#####################################################################
#####         Replicate all data analyses in main paper:        #####
#####       "The Aggregate Costs of Political Connections"      #####
#####           Date created: 12.09.2025, Jonas Gathen          #####
#####################################################################

#################################
###### 1. Data preparation ###### 
#################################

##### Libraries #####

# markdown & base packages
library(bookdown)
library(tidyverse)
options(dplyr.summarise.inform = FALSE) # to hide summarise info
library(knitr)
library(haven)

# plotting packages
library(ggridges)
library(gridExtra)
library(ggnewscale)
library(ggpubr)
library(extrafont) 
loadfonts()

# table package
library(kableExtra)
library(gtools)

# extra matrix operations
library(matrixStats)

# nonlinear roots
library(nleqslv)
library(lbfgsb3c)
library(optimParallel)

# paralellization 
library(foreach)
library(doParallel)

# for unobserved heterogeneity
library(mvtnorm)

# for generalizes simulated annealing solver
library(GenSA)

##### Main parameters #####

# Stationary case: interest rate fixed by HH preferences
beta <- 0.95
r <- 1/beta - 1
delta <- 0.1
R <- r + delta
wage <- 1
P <- 1.0 

# tax system 
vat_tax <- 0.1 
profit_tax <- 0.2 

##### Load data #####

# Load data
firms_1997 <- read_csv(file = "Empirical Analysis/output/firms_1997_revised.csv")

# get intermediates & their cost shares 
firms_1997 <- firms_1997 %>% 
  mutate(intermediates = routs - rvads) # assume that VA output is observed net of VAT 

# exit analyses (CHECK IF ITS ENOUGH TO LOAD firms_1997)
firms <- read_csv(file = "Empirical Analysis/output/firms_panel.csv")

###########################################################
###### Section 2: Political Connections in Indonesia ###### 
###########################################################

########### Differences between connected and non-connected firms ###########

####### Figure 1 (gross output) #######

# Get number of observations
nrow_non_connected <- nrow(firms_1997[!is.na(firms_1997$routs) & firms_1997$connected == "non-connected",])
nrow_connected <- nrow(firms_1997[!is.na(firms_1997$routs) & firms_1997$connected == "connected",])

# Dispersion
sd(firms_1997$routs[firms_1997$connected == "connected"])/mean(firms_1997$routs[firms_1997$connected == "connected"])
sd(firms_1997$routs[firms_1997$connected == "non-connected"])/mean(firms_1997$routs[firms_1997$connected == "non-connected"])

# Real gross output
firms_1997 %>% 
  mutate(connections = fct_rev(as.factor(connected)), 
         connected = case_when(
           connected == "connected" ~ "C", 
           connected == "non-connected" ~ "NC")) %>% 
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = routs/9040, # exchange rate with USD in 2010 (in 000's)
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, show.legend = TRUE) + 
  scale_x_continuous(name = "Sales (in 000' USD)", trans = "log2",
                     breaks = c(1,10,100,1000,10000,100000,1000000),
                     labels = c("1","10","100","1k","10k","100k","1mio"),
                     limits = c(2.5,2500000)) + ylab("") + 
  theme_classic(base_size = 14, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

# PDF
ggsave("Paper writing/Revision JPE Macro/Final paper/Publication Figures/Figure1.pdf", width = 6, height = 4, units = "in")


####### Table 1 (gross output) #######

# unconditional ratio: mean ratio of 12
unconditional_ratio <- (firms_1997 %>% group_by(connected) %>% 
                          summarize(mean_routs = mean(routs)) %>% ungroup() %>%
                          summarize(ratio = first(mean_routs)/last(mean_routs)))$ratio[1]

#### Within industry size ratios ####

# conditional on 2digit sector: mean ratio of 12.62
ratio_2digit <- (firms_1997 %>% group_by(connected,industry_2digit) %>% 
                   summarize(
                     n_obs = n(),
                     mean_routs = mean(routs)
                   ) %>% ungroup() %>% 
                   # delete groups without connected firms
                   group_by(industry_2digit) %>%
                   mutate(connected_present = (n() == 2)) %>% ungroup() %>%
                   filter(connected_present == TRUE) %>% 
                   # now aggregate
                   group_by(industry_2digit) %>% 
                   summarize(
                     n_obs = first(n_obs),
                     ratio = first(mean_routs)/last(mean_routs)
                   ) %>% ungroup() %>%
                   summarize(
                     ratio = sum(ratio*n_obs)/nrow_connected
                   ))$ratio[1]

# conditional on 3digit sector: mean ratio of 11
ratio_3digit <- (firms_1997 %>% group_by(connected,industry_3digit) %>% 
                   summarize(
                     n_obs = n(),
                     mean_routs = mean(routs)
                   ) %>% ungroup() %>% 
                   # delete groups without connected firms
                   group_by(industry_3digit) %>%
                   mutate(connected_present = (n() == 2)) %>% ungroup() %>%
                   filter(connected_present == TRUE) %>% 
                   # now aggregate
                   group_by(industry_3digit) %>%
                   summarize(
                     n_obs = first(n_obs),
                     ratio = first(mean_routs)/last(mean_routs)
                   ) %>% ungroup() %>%
                   summarize(
                     ratio = sum(ratio*n_obs)/nrow_connected
                   ))$ratio[1]

# conditional on 4digit sector: mean ratio of 9.44
ratio_4digit <- (firms_1997 %>% group_by(connected,industry_4digit) %>% 
                   summarize(
                     n_obs = n(),
                     mean_routs = mean(routs)
                   ) %>% ungroup() %>% 
                   # delete groups without connected firms
                   group_by(industry_4digit) %>%
                   mutate(connected_present = (n() == 2)) %>% ungroup() %>%
                   filter(connected_present == TRUE) %>% 
                   # now aggregate
                   group_by(industry_4digit) %>%
                   summarize(
                     n_obs = first(n_obs),
                     ratio = first(mean_routs)/last(mean_routs)
                   ) %>% ungroup() %>%
                   summarize(
                     ratio = sum(ratio*n_obs)/nrow_connected
                   ))$ratio[1]

# conditional on 5digit sector: mean ratio of 15
ratio_5digit <- (firms_1997 %>% group_by(connected,PROD5D) %>% 
                   summarize(
                     n_obs = n(),
                     mean_routs = mean(routs)
                   ) %>% ungroup() %>% 
                   # delete groups without connected firms
                   group_by(PROD5D) %>%
                   mutate(connected_present = (n() == 2)) %>% ungroup() %>%
                   filter(connected_present == TRUE) %>% 
                   # now aggregate
                   group_by(PROD5D) %>%
                   summarize(
                     n_obs = first(n_obs),
                     ratio = first(mean_routs)/last(mean_routs)
                   ) %>% ungroup() %>%
                   summarize(
                     ratio = sum(ratio*n_obs)/nrow_connected
                   ))$ratio[1]

#### Save Table: Summary size ratios ####

table_size_ratios <- tibble(
  # variables
  ` ` = c("Ratio","# industries","# industries w/ connected firm"),
  
  # unconditional ratio
  `unconditional` = c(round(unconditional_ratio,2),
                      paste(round(length(unique(firms_1997$industry_1digit)),0)),
                      paste(round(length(unique(firms_1997$industry_1digit[firms_1997$connected == "connected"])),0))
  ),
  
  # within industry
  `2-digit` = c(round(ratio_2digit,2),
                paste(round(length(unique(firms_1997$industry_2digit)),0)),
                paste(round(length(unique(firms_1997$industry_2digit[firms_1997$connected == "connected"])),0))
  ),
  
  `3-digit` = c(round(ratio_3digit,2),
                paste(round(length(unique(firms_1997$industry_3digit)),0)),
                paste(round(length(unique(firms_1997$industry_3digit[firms_1997$connected == "connected"])),0))
  ),
  
  `4-digit` = c(round(ratio_4digit,2),
                paste(round(length(unique(firms_1997$industry_4digit)),0)),
                paste(round(length(unique(firms_1997$industry_4digit[firms_1997$connected == "connected"])),0))
  ),
  
  `5-digit` = c(round(ratio_5digit,2),
                paste(round(length(unique(firms_1997$PROD5D)),0)),
                paste(round(length(unique(firms_1997$PROD5D[firms_1997$connected == "connected"])),0))
  )
)

write_csv(x = table_size_ratios, file = "Paper writing/Revision JPE Macro/Final paper/Publication Tables/Table1.csv")

####### Descriptives on distribution of connected firms across industries #######

# Dispersion across industries

## 2-digit
length(unique(firms_1997$industry_2digit[firms_1997$connected == "connected"]))
length(unique(firms_1997$industry_2digit[firms_1997$connected == "non-connected"]))

## 3-digit
length(unique(firms_1997$industry_3digit[firms_1997$connected == "connected"]))
length(unique(firms_1997$industry_3digit[firms_1997$connected == "non-connected"]))

## 4-digit
length(unique(firms_1997$industry_4digit[firms_1997$connected == "connected"]))
length(unique(firms_1997$industry_4digit[firms_1997$connected == "non-connected"]))


####### Figure A.1 (value-added in Appendix) #######

# Dispersion in value added: non-connected are 4x more dispersed 
sd(firms_1997$rvads[firms_1997$connected == "connected"])/mean(firms_1997$rvads[firms_1997$connected == "connected"])
sd(firms_1997$rvads[firms_1997$connected == "non-connected"])/mean(firms_1997$rvads[firms_1997$connected == "non-connected"])

# Real value-added output
firms_1997 %>% 
  mutate(connections = fct_rev(as.factor(connected)), 
         connected = case_when(
           connected == "connected" ~ "C", 
           connected == "non-connected" ~ "NC")) %>%
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = rvads/9040, # exchange rate with USD in 2010 (in 000's)
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, show.legend = TRUE) + 
  scale_x_continuous(name = "(real) value-added production output (in '000 USD)", trans = "log2",
                     breaks = c(1,10,100,1000,10000,100000,1000000),
                     labels = c("1","10","100","1k","10k","100k","1mio"),
                     limits = c(0.25,2500000)) + ylab("") + 
  theme_classic(base_size = 14, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

############################################################
###### Section 3: Quantifying the role of connections ###### 
############################################################

####### TFPQ distribution #######

#### Preparation: get gross output elasticities & TFPQ measure ####

## Step 1: gross output elasticities 

# for this, make sure to create flexible FEs first & get all input shares 
firms_1997 <- firms_1997 %>% 
  mutate(estyear_FE = ntile(x = estyear, n = 15), 
         capital_share = capital_bill/routs, 
         labor_share = wage_bill/routs, 
         intermediate_share = intermediates/routs 
         )

# make sure that input cost shares are reasonable by winsorizing ends!  
firms_1997 <- firms_1997 %>% 
  mutate(
    capital_share = case_when(
      capital_share < quantile(capital_share, probs = 0.1) ~ unname(quantile(capital_share, probs = 0.1)),
      capital_share > quantile(capital_share, probs = 0.9) ~ unname(quantile(capital_share, probs = 0.9)),
      .default = capital_share),
    labor_share = case_when(
      labor_share < quantile(labor_share, probs = 0.1, na.rm = T) ~ unname(quantile(labor_share, probs = 0.1, na.rm = T)),
      labor_share > quantile(labor_share, probs = 0.9, na.rm = T) ~ unname(quantile(labor_share, probs = 0.9, na.rm = T)),
      .default = labor_share)
  ) %>% 
  # make sure that joint shares are not above 1 (would be inconsistent with model) 
  mutate(
    joint_share = capital_share + labor_share + intermediate_share, 
    # winsorize at bottom & downweight those whose joint share > 1.0
    capital_share = case_when(
      joint_share < quantile(joint_share, probs = 0.1, na.rm = T) ~ (capital_share/joint_share)*unname(quantile(joint_share, probs = 0.1, na.rm = T)), 
      joint_share > 1.0 ~ capital_share/joint_share, 
      .default = capital_share),
    labor_share = case_when(
      joint_share < quantile(joint_share, probs = 0.1, na.rm = T) ~ (labor_share/joint_share)*unname(quantile(joint_share, probs = 0.1, na.rm = T)), 
      joint_share > 1.0 ~ labor_share/joint_share,
      .default = labor_share),
    intermediate_share = case_when(
      joint_share > 1.0 ~ intermediate_share/joint_share,
      .default = intermediate_share)
  )

# get gross elasticities
gross_elasticities <- firms_1997 %>% filter(connected == "non-connected") %>% 
  summarise(
    med_cap_share_1digit_gross = median(capital_share,na.rm = T),
    med_lab_share_1digit_gross = median(labor_share,na.rm = T),
    med_mat_share_1digit_gross = median(intermediate_share,na.rm = T))
write_csv(x = gross_elasticities, file = "Computation/data/gross_elasticities.csv")

# save elasticities 
alpha_gross <- gross_elasticities$med_cap_share_1digit_gross/(1-vat_tax)
beta_gross <- gross_elasticities$med_lab_share_1digit_gross/(1-vat_tax)
gamma_gross <- gross_elasticities$med_mat_share_1digit_gross

eta_tilde_baseline <- alpha_gross + beta_gross + gamma_gross

## Residualize expenditure shares (for later) 

## Capital 

# residualize capital share
OLS_resid_capital_share <- fixest::feols(
  fml = capital_share ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit,
  data = firms_1997)

# get residualized capital share 
firms_1997$capital_share_resid <- firms_1997$capital_share - predict(object = OLS_resid_capital_share, newdata = firms_1997) + mean(firms_1997$capital_share)


## Labor 

# residualize labor share
OLS_resid_labor_share <- fixest::feols(
  fml = labor_share ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit,
  data = firms_1997)

# get residualized labor share 
firms_1997$labor_share_resid <- firms_1997$labor_share - predict(object = OLS_resid_labor_share, newdata = firms_1997) + mean(firms_1997$labor_share, na.rm = T)

## Intermediates 

# residualize intermediate share
OLS_resid_intermediate_share <- fixest::feols(
  fml = intermediate_share ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit,
  data = firms_1997)

# get residualized intermediate share 
firms_1997$intermediate_share_resid <- firms_1997$intermediate_share - predict(object = OLS_resid_intermediate_share, newdata = firms_1997) + mean(firms_1997$intermediate_share, na.rm = T)

### Compute TFPQ 

# compute TFPQ using gross output first 
firms_1997 <- firms_1997 %>% 
  mutate(
    log_TFPQ_gross = log(routs) - alpha_gross*log(alpha_gross*(1-vat_tax)*routs/R) - beta_gross*log(beta_gross*(1-vat_tax)*routs) - gamma_gross*log(gamma_gross*routs), 
    TFPQ = exp(log_TFPQ_gross))

#### Residualize TFPQ #### 

## What about residualizing log TFPQ? 

# residualize TFPQ (Residualize also by capital & labor shares?)
OLS_resid_TFPQ <- fixest::feols(
  fml = log_TFPQ_gross ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit, #
  data = firms_1997)

# get residualized TFPQ measure 
firms_1997$log_TFPQ_resid <- firms_1997$log_TFPQ_gross - predict(object = OLS_resid_TFPQ, newdata = firms_1997) + mean(firms_1997$log_TFPQ_gross)
# firms_1997$TFPQ_resid <- firms_1997$TFPQ - predict(object = OLS_resid_TFPQ, newdata = firms_1997) + mean(firms_1997$TFPQ)
firms_1997$TFPQ_resid <- exp(firms_1997$log_TFPQ_resid)

#### Figure 2 (left panel): residualized TFPQ distributions (gross output) #######

# TFPQ distribution 
plot_TFPQ_distrib <- firms_1997 %>% 
  mutate(
    connections = fct_rev(as.factor(connected)), 
    connected = case_when(
      connected == "connected" ~ "C", 
      connected == "non-connected" ~ "NC")) %>% 
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = log_TFPQ_resid, # divide by 9040 to show in 2010 USD
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, 
    show.legend = TRUE, bandwidth = 0.075) + 
  labs(
    x = "log TFPQ",
    y = ""
  ) + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

#### Figure 2 (right panel): TFPQ_QR distribution implied by resid TFPQ ####

# Get baseline TFPQ_QR function
compute_baseline_TFPQ_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # Arrange from low to high
  data_connect <- data_connect %>%
    arrange(TFPQ_resid)
  
  # get rows of connected firms 
  n_connect <- nrow(data_connect)
  
  ## First get restricted productivity distribution
  
  ### Need to have specified cutoff! ### 
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>% 
    # only look at TFPQ
    select(c(TFPQ_resid)) %>% 
    # order by TFPQ
    arrange(TFPQ_resid) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  restricted_productivity_distribution <- restricted_productivity_distribution$TFPQ_resid
  
  # Now get quantiles (weighted by rho if supplied)
  z_connected <- unname(quantile(
    x = restricted_productivity_distribution,
    probs = c(1:n_connect)/(n_connect+1),
  ))
  TFPQ_QR_connected <- data_connect$TFPQ_resid/z_connected - 1 
  
  # return results
  return(list(productivity = z_connected, TFPQ_QR = TFPQ_QR_connected))
}

# Get bootstrapping TFPQ_QR function 
compute_bs_baseline_TFPQ_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # create productivity variable
  data_connect$productivity <- NA
  
  # Arrange from high to low
  data_connect <- data_connect %>%
    arrange(desc(TFPQ_resid))
  
  ## First get restricted productivity distribution
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>% 
    # only look at TFPR
    select(c(TFPQ_resid)) %>% 
    # order by TFPR
    arrange(TFPQ_resid) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  ## Now, draw with replacement from restricted productivity distribution
  
  n_draws <- nrow(data_connect)
  
  # set seeds
  set.seed(123456)
  n_bootstraps = 10000
  bootstrap_seeds = rnorm(n = n_bootstraps, mean = 1000, sd = 100)
  
  # Get sampling function 
  take_sample <- Vectorize(function(data,seed,n_draws){
    set.seed(seed)
    return(sort(sample(x = data, size = n_draws, replace = TRUE), decreasing = T))
  }, vectorize.args = c("seed"))
  
  # bootstrap samples
  bs_productivities <- take_sample(data = restricted_productivity_distribution$TFPQ_resid, 
                                   seed = bootstrap_seeds, n_draws = n_draws)
  
  bs_TFPQ_QR <- data_connect$TFPQ_resid/bs_productivities - 1
  
  # return results
  return(bs_TFPQ_QR)
}

# baseline TFPQ_QR (without fixed cost)
baseline_TFPQ_QR_noF <- compute_baseline_TFPQ_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

baseline_TFPQ_QR_noF_bs <- compute_bs_baseline_TFPQ_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

# baseline TFPQ_QR (max fixed cost)
max_cutoff <- (ecdf(firms_1997$TFPQ_resid[firms_1997$connected == "non-connected"]))(
  min(firms_1997$TFPQ_resid[firms_1997$connected == "connected"])
)

baseline_TFPQ_QR_maxF <- compute_baseline_TFPQ_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = max_cutoff)

baseline_TFPQ_QR_maxF_bs <- compute_bs_baseline_TFPQ_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = max_cutoff)

## Summarize bootstrapped confidence bands  
CI_baseline_TFPQ_QR_bs_upper <- tibble(
  productivity_order = c(241:1),
  `2.5%` = t(apply(baseline_TFPQ_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,1],
  `97.5%` = t(apply(baseline_TFPQ_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,2]
) %>% pivot_longer(cols = c(`2.5%`,`97.5%`), names_to = "Percentiles", values_to = "TFPQ_QR")

CI_baseline_TFPQ_QR_bs_lower <- tibble(
  productivity_order = c(241:1),
  `2.5%` = t(apply(baseline_TFPQ_QR_maxF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,1],
  `97.5%` = t(apply(baseline_TFPQ_QR_maxF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,2]
) %>% pivot_longer(cols = c(`2.5%`,`97.5%`), names_to = "Percentiles", values_to = "TFPQ_QR")


# Plot estimated TFPQ_QR schedule  
plot_baseline_TFPQ_QR_bound <- ggplot() + 
  geom_line(data = CI_baseline_TFPQ_QR_bs_upper, aes(x = (productivity_order/241)*100, y = TFPQ_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_line(data = CI_baseline_TFPQ_QR_bs_lower, aes(x = (productivity_order/241)*100, y = TFPQ_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_line(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_QR_noF$TFPQ_QR, color = "Upper"), linewidth = 1) + 
  geom_line(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_QR_maxF$TFPQ_QR, color = "Lower"), linewidth = 1) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Percentile") + 
  scale_color_discrete(name = "Bound:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom") + 
  ylim(c(0,0.65))

#### Figure 2 joint plot #### 

ggarrange(
  plot_TFPQ_distrib + labs(caption="(A) TFPQ distributions C vs NC") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_baseline_TFPQ_QR_bound + labs(caption="(B) TFPQ quantile ratio (with bounds)") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  
ggsave(width = 12, height = 5, dpi = 200,
       "Paper writing/Revision JPE Macro/Final paper/Publication Figures/Figure2.pdf") 

################################
###### Firm exit analyses ######
################################

####### Create exit variable #######

firms <- firms %>% 
  arrange(PSID,year) %>% 
  group_by(PSID) %>% 
  mutate(
    # exit 
    max_year = max(year), 
    year_next = lead(year), 
    exit_rob = year == max_year, 
    exit_alt = is.na(year_next) | year_next > year + 1, 
    # entry 
    min_year = min(year),
    entry = year == min_year) %>% 
  ungroup() 

# restrict to 1997
firms_exit <- firms %>% filter(year == 1997)

# look at exit probability of connected vs non-connected firms in 1997
firms_exit %>%
  group_by(connected) %>% 
  summarise(exit_rate = mean(exit_rob)) ## No exit of connected firms!!! 

# should see about 18 connected firms exiting (on average), if assumption was right!! 
# But no connected firm exits!! 

# Alternative exit measure: still only minimal exit among connected firms! 
firms_exit %>% 
  group_by(connected) %>% 
  summarise(exit_rate = mean(exit_alt))

# Less entry of connected vs non-connected firms in 1997
firms_exit %>% 
  group_by(connected) %>% 
  summarise(entry_rate = mean(entry))


####### TFPQ dynamics (survivors & not) #######

### restrict to years 1997 & 1998 
firms_dynamics <- firms %>% filter(year %in% c(1997,1998))

### Step 1: construct (raw) TFPQ 
firms_dynamics <- firms_dynamics %>% 
  mutate(
    log_TFPQ_gross = log(routs) - alpha_gross*log(alpha_gross*routs/R) - beta_gross*log(beta_gross*routs) - gamma_gross*log(gamma_gross*routs), 
    TFPQ = exp(log_TFPQ_gross)
    )

### Step 2: take out time FEs ()

# check: TFPQ is slightly lower on average, log TFPQ is almost the same!
firms_dynamics %>% group_by(year) %>% 
  summarise(
    median_TFPQ = median(TFPQ),
    mean_TFPQ = mean(TFPQ)
  )

# residualize by time (same as future residualizing: do this in logs, not levels) 
firms_dynamics <- firms_dynamics %>% 
  group_by(year) %>% 
  mutate(avg_log_TFPQ_year = mean(log_TFPQ_gross)) %>% 
  ungroup() %>% 
  mutate(log_TFPQ_time_resid = log_TFPQ_gross - avg_log_TFPQ_year + first(avg_log_TFPQ_year[year == 1997]))

# Now look at TFPQ dynamics 
firms_dynamics <- firms_dynamics %>% 
  group_by(PSID) %>% 
  mutate(log_TFPQ_next = lead(log_TFPQ_time_resid, n = 1, order_by = year)) %>% 
  ungroup() 

# restrict to survivors only 
firms_dynamics_survivors <- firms_dynamics %>% filter(!is.na(log_TFPQ_next))

# make sure have all the fixed effects needed 
firms_dynamics_survivors <- left_join(x = firms_dynamics_survivors, 
                                      y = firms_1997 %>% select(PSID,estyear_FE))

# fully residualize TFPQ for survivors (both for TFPQ & TFPQ next)
firms_dynamics_survivors$log_TFPQ_resid <- firms_dynamics_survivors$log_TFPQ_gross - predict(object = OLS_resid_TFPQ, newdata = firms_dynamics_survivors) + mean(firms_dynamics$log_TFPQ_time_resid)
firms_dynamics_survivors$log_TFPQ_resid_next <- firms_dynamics_survivors$log_TFPQ_next - predict(
  object = OLS_resid_TFPQ, 
  # make sure that use the right TFPQ measure (TFPQ next)
  newdata = firms_dynamics_survivors %>% 
    select(log_TFPQ_next,prov,state_owned_major,state_owned_partly,estyear_FE,industry_4digit) %>% 
    rename(log_TFPQ_gross = log_TFPQ_next)
  ) + mean(firms_dynamics$log_TFPQ_time_resid)

# extract z_star for non-connected survivors 
NC_survivors <- firms_dynamics_survivors %>% filter(connected == "non-connected")
log_z_star_NC_survivors <- NC_survivors$log_TFPQ_resid
log_z_star_next_NC_survivors <- NC_survivors$log_TFPQ_resid_next

## Note: z_star_next looks actually slightly worse (!) than z_star for NC survivors 


####### Robustness: TFPQ dynamics 1996-1997 & Crisis #######

### Start with showing effects of Asian Financial Crisis ### 
firms %>% filter(year %in% c(1990:2003) & !is.na(routs)) %>% 
  group_by(year) %>% 
  mutate(total_output = sum(routs), 
         total_VA = sum(rvads)) %>% 
  ungroup() %>% 
  mutate(total_output = total_output/first(total_output[year == 1997]),
         total_VA = total_VA/first(total_VA[year == 1997])) %>% 
  ggplot() + 
  geom_line(aes(x = year, y = total_output, color = "Total sales")) + 
  geom_line(aes(x = year, y = total_VA, color = "Total value-added")) + 
  geom_vline(xintercept = 1998, linetype = "dashed") + 
  ylab(label = "") + 
  xlab(label = "Year") + 
  annotate(geom = "text", label = "First crisis year",
           x = 1998,
           y = 0.5, 
           hjust = -0.1) + 
  # geom_text(aes(x=1998, label=, y=0.7)) + 
  scale_color_discrete(name = "Measure:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom") + 
  scale_x_continuous(breaks = seq(1990, 2002, by = 2), 
                     labels = c("'90","'92","'94","'96",
                                "'98","'00","'02"))
# ggsave(width = 6, height = 4, "Paper writing/Revision JPE Macro/paper revision round 2/Figures/output-evolution-asian-crisis.png")


### Next, look at TFPQ dynamics from 1996-1997 instead ### 

### restrict to years 1996 & 1997 
firms_dynamics_rob96 <- firms %>% filter(year %in% c(1996,1997))

### Step 1: construct (raw) TFPQ 
firms_dynamics_rob96 <- firms_dynamics_rob96 %>% 
  mutate(
    log_TFPQ_gross = log(routs) - alpha_gross*log(alpha_gross*routs/R) - beta_gross*log(beta_gross*routs) - gamma_gross*log(gamma_gross*routs), 
    TFPQ = exp(log_TFPQ_gross)
  )

### Step 2: take out time FEs ()

# check: TFPQ grows slightly over time 
firms_dynamics_rob96 %>% group_by(year) %>% 
  summarise(
    median_TFPQ = median(TFPQ),
    mean_TFPQ = mean(TFPQ)
  )

# residualize by time (same as future residualizing: do this in logs, not levels) 
firms_dynamics_rob96 <- firms_dynamics_rob96 %>% 
  group_by(year) %>% 
  mutate(avg_log_TFPQ_year = mean(log_TFPQ_gross)) %>% 
  ungroup() %>% 
  mutate(log_TFPQ_time_resid = log_TFPQ_gross - avg_log_TFPQ_year + first(avg_log_TFPQ_year[year == 1997]))

# Now look at TFPQ dynamics 
firms_dynamics_rob96 <- firms_dynamics_rob96 %>% 
  group_by(PSID) %>% 
  mutate(log_TFPQ_next = lead(log_TFPQ_time_resid, n = 1, order_by = year)) %>% 
  ungroup() 

# restrict to survivors only 
firms_dynamics_rob96_survivors <- firms_dynamics_rob96 %>% filter(!is.na(log_TFPQ_next))

# make sure have all the fixed effects needed 
firms_dynamics_rob96_survivors <- left_join(x = firms_dynamics_rob96_survivors, 
                                      y = firms_1997 %>% select(PSID,estyear_FE))

# fully residualize TFPQ for survivors (both for TFPQ & TFPQ next)
firms_dynamics_rob96_survivors$log_TFPQ_resid <- firms_dynamics_rob96_survivors$log_TFPQ_gross - predict(object = OLS_resid_TFPQ, newdata = firms_dynamics_rob96_survivors) + mean(firms_dynamics_rob96$log_TFPQ_time_resid)
firms_dynamics_rob96_survivors$log_TFPQ_resid_next <- firms_dynamics_rob96_survivors$log_TFPQ_next - predict(
  object = OLS_resid_TFPQ, 
  # make sure that use the right TFPQ measure (TFPQ next)
  newdata = firms_dynamics_rob96_survivors %>% 
    select(log_TFPQ_next,prov,state_owned_major,state_owned_partly,estyear_FE,industry_4digit) %>% 
    rename(log_TFPQ_gross = log_TFPQ_next)
) + mean(firms_dynamics_rob96$log_TFPQ_time_resid)

# extract z_star for non-connected survivors 
NC_survivors_rob96 <- firms_dynamics_rob96_survivors %>% filter(connected == "non-connected")

# In data, run regression of log(TFPQ post) on log(TFPQ pre) & collect regression coefficients 
lm_prod_process_rob96 <- lm(formula = log_TFPQ_resid_next ~ log_TFPQ_resid, data = NC_survivors_rob96)

# Moments are 2 coefficients & variance 
lm_prod_process_rob96$coefficients
var(lm_prod_process_rob96$residuals)

## How do these estimates differ? Intercept is quite a bit larger (almost 2x), Var & persistence are both slightly smaller

####### Robustness: Exit dynamics 1996-1997 & Crisis #######

### Check exit probabilities over time ### 
firms %>% filter(year %in% c(1990:2003) & !is.na(routs)) %>% 
  group_by(year) %>% 
  mutate(mean_exit = mean(exit_rob)) %>% 
  ungroup() %>% 
  ggplot() + 
  geom_line(aes(x = year, y = mean_exit)) + 
  geom_vline(xintercept = 1998, linetype = "dashed") + 
  ylab(label = "Avg Exit Rate") + 
  xlab(label = "Year") + 
  annotate(geom = "text", label = "First crisis year",
           x = 1998,
           y = 0.12, 
           hjust = -0.1) + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom") + 
  ylim(c(0.0,0.15)) + 
  scale_x_continuous(breaks = seq(1990, 2002, by = 2), 
                     labels = c("'90","'92","'94","'96",
                                "'98","'00","'02"))

# ggsave(width = 6, height = 4, "Paper writing/Revision JPE Macro/paper revision round 2/Figures/exit-rate-evolution-asian-crisis.png")

## Alternative data moments for exit for earlier years 

## create firms_1996_rob
firms_1996_rob <- firms %>% filter(year == 1996)

# for this, make sure to create flexible FEs first
firms_1996_rob <- firms_1996_rob %>% 
  mutate(estyear_FE = ntile(x = estyear, n = 15))

# compute TFPQ using gross output (use same coefficients as for 1997) 
firms_1996_rob <- firms_1996_rob %>% 
  mutate(
    log_TFPQ_gross = log(routs) - alpha_gross*log(alpha_gross*(1-vat_tax)*routs/R) - beta_gross*log(beta_gross*(1-vat_tax)*routs) - gamma_gross*log(gamma_gross*routs), 
    TFPQ = exp(log_TFPQ_gross))

# residualize TFPQ (Residualize also by capital & labor shares?)
OLS_resid_TFPQ_rob96 <- fixest::feols(
  fml = log_TFPQ_gross ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit, #
  data = firms_1996_rob)

# get residualized TFPQ measure 
firms_1996_rob$log_TFPQ_resid <- firms_1996_rob$log_TFPQ_gross - predict(object = OLS_resid_TFPQ_rob96, newdata = firms_1996_rob) + mean(firms_1996_rob$log_TFPQ_gross)

# first residualize exit, then run regression 
# residualize exit
OLS_resid_exit_rob96 <- fixest::feols(
  fml = exit_rob ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit, #
  data = firms_1996_rob) 

# get residualized exit measure 
firms_1996_rob$exit_resid <- firms_1996_rob$exit_rob - predict(object = OLS_resid_exit_rob96, newdata = firms_1996_rob) + mean(firms_1996_rob$exit_rob)

# Use this regression as target!! 
reg_firms_NC_exit_resid_rob96 <- lm(
  data = firms_1996_rob %>% filter(connected == "non-connected"),
  formula = exit_resid ~ log_TFPQ_resid)
summary(reg_firms_NC_exit_resid_rob96)


#################################
###### Estimation of model ######
#################################

################ Baseline results Connections Technology ################

#### Prepare objects & set grid length ####

# x_star (needed to move between z_star to z_tilde)
x_star_baseline <- ( (((1-vat_tax)*alpha_gross/R)^alpha_gross)*(((1-vat_tax)*beta_gross/wage)^beta_gross)*((gamma_gross/P)^gamma_gross) )

# create z_tilde_function
z_tilde_baseline <- function(z_star){
  return( (z_star*x_star_baseline)^(1/(1-eta_tilde_baseline)) )
}

# specify grid length for dense and non-dense
grid_length <- 100
grid_dense_length <- 1000

# targeted moment 
empirical_QR_baseline <- baseline_TFPQ_QR_noF$TFPQ_QR

# Extract z_star_NC
z_star_NC_baseline <- firms_1997$TFPQ_resid[firms_1997$connected == "non-connected"]

# get density distribution over z_star_NC
z_star_marginal_distrib_baseline <- approxfun(density(log(z_star_NC_baseline)))(log(z_star_NC_baseline))

# get quantiles of non-connected 
quantiles_non_connected_TFPQ_data_baseline <- unname(quantile(
  x = firms_1997$TFPQ_resid[firms_1997$connected == "non-connected"],
  probs = c(1:nrow_connected)/(nrow_connected+1),
))  

# specify grid here!! Want to range over z_i for non-connected firms (don't need very many points!) 
z_star_grid_baseline <- seq(from = unname(quantile(z_star_NC_baseline,probs = 0.0)), 
                   to = unname(quantile(z_star_NC_baseline,probs = 1)), 
                   length.out = grid_length)

z_star_grid_dense_baseline <- seq(from = unname(quantile(z_star_NC_baseline,probs = 0.0)), 
                         to = unname(quantile(z_star_NC_baseline,probs = 1)), 
                         length.out = grid_dense_length)

z_tilde_grid_baseline <- z_tilde_baseline(z_star = z_star_grid_baseline)

z_tilde_grid_dense_baseline <- z_tilde_baseline(z_star = z_star_grid_dense_baseline)

# get density distribution over z_star grids 
z_star_grid_marginal_distrib_baseline <- approxfun(density(z_star_NC_baseline))(z_star_grid_baseline)
z_star_grid_marginal_distrib_baseline <- z_star_grid_marginal_distrib_baseline/sum(z_star_grid_marginal_distrib_baseline)

z_star_grid_dense_marginal_distrib_baseline <- approxfun(density(z_star_NC_baseline))(z_star_grid_dense_baseline)
z_star_grid_dense_marginal_distrib_baseline <- z_star_grid_dense_marginal_distrib_baseline/sum(z_star_grid_dense_marginal_distrib_baseline)

# need this for later
lowest_TFPQ_connected_baseline <- min(firms_1997$TFPQ_resid[firms_1997$connected == "connected"])

# elasticity ratio
elasticity_ratio_baseline <- eta_tilde_baseline/(1-eta_tilde_baseline)

### Create z_star vectors 

# z_star grid long 
z_star_grid_long_baseline <- (crossing(
  z_star = z_star_grid_baseline,
  epsilon = c(1:grid_length)))$z_star 

# z_star grid dense long 
z_star_grid_dense_long_baseline <- (crossing(
  z_star = z_star_grid_dense_baseline,
  epsilon = c(1:grid_dense_length)))$z_star 

# also make z star grid marginal distrib long
z_star_grid_dense_marginal_distrib_long_baseline <- approxfun(density(z_star_NC_baseline))(z_star_grid_dense_long_baseline)

# can create z_tilde outside the loop
z_tilde_grid_long_baseline <- z_tilde_baseline(z_star = z_star_grid_long_baseline)
z_tilde_grid_dense_long_baseline <- z_tilde_baseline(z_star = z_star_grid_dense_long_baseline)

# can create revenue & profits NC outside the loop 
revenue_NC_z_star_grid_long_baseline <- z_tilde_grid_long_baseline
profits_NC_z_star_grid_long_baseline <- (1-profit_tax)*(1-eta_tilde_baseline)*z_tilde_grid_long_baseline

revenue_NC_z_star_grid_dense_long_baseline <- z_tilde_grid_dense_long_baseline
profits_NC_z_star_grid_dense_long_baseline <- (1-profit_tax)*(1-eta_tilde_baseline)*z_tilde_grid_dense_long_baseline

# can also create revenue NC outside the loop

# get targeted moment: connections TFPQ distribution  
quantiles_connected_TFPQ_data_baseline <- sort(firms_1997$TFPQ_resid[firms_1997$connected == "connected"])

# express fixed cost in terms of share of NC profits at quantile cutoff 
quantile_cutoff_baseline <- (ecdf(firms_1997$TFPQ_resid[firms_1997$connected == "non-connected"]))(min(firms_1997$TFPQ_resid[firms_1997$connected == "connected"]))
fixed_cost_profit_normalization <- profits_NC_z_star_grid_long_baseline[which.min(x = abs(z_star_grid_long_baseline - quantile(z_star_NC_baseline, probs = quantile_cutoff_baseline)))]


#### Create functions to solve for optimal subsidies over grid (create both DRS & DRS+convex) ####

# for DRS model
compute_subsidy_DRS_eps_model_grid <- function(z_star_grid, epsilon_grid, z_tilde_grid, theta, elasticity_ratio = elasticity_ratio_baseline, eta_tilde = eta_tilde_baseline){
  
  #### Step 1: Preparation ####
  
  # specify FOC 
  rent_seeking_FOC <- function(m_R,z_tilde_value,epsilon){
    return( z_tilde_value*( (1+epsilon*((m_R)^(theta)))^elasticity_ratio )*theta*epsilon*((m_R)^(theta-1)) - P )
  }
  
  # for lower bound of root finding: use something close to zero 
  lower_bound <- 1e-32
  
  # pick high number for upper bound 
  upper_bound <- 1e+32
  
  #### Step 2: Solve for m_R* using FOCs ####
  
  get_optimal_m_R <- Vectorize(FUN = function(epsilon,z_tilde_value){
    
    # Compute optimal m_R
    optimal_m_R <- (uniroot(
      f = function(x){rent_seeking_FOC(m_R = x, z_tilde_value = z_tilde_value, epsilon = epsilon)},
      interval = c(lower_bound,upper_bound),
      extendInt = "downX", # extend interval in case initial doesnt work (in case upper bound not large enough?)
      tol = 0.0001,
      maxiter = 10000))$root
    
    # return optimal m_R
    return(optimal_m_R)
  }, vectorize.args = c("epsilon","z_tilde_value"))
  
  optimal_m_R <- get_optimal_m_R(epsilon = epsilon_grid, z_tilde_value = z_tilde_grid)
  
  #### Step 3: Solve for remaining objects needed ####
  
  final_results <- tibble(z_star = z_star_grid,
                          epsilon = epsilon_grid,
                          optimal_m_R = optimal_m_R,
                          optimal_subsidy = epsilon_grid*(optimal_m_R^theta),
                          optimal_revenue = z_tilde_grid*( (1 + optimal_subsidy )^(elasticity_ratio + 1) ),
                          optimal_profits_C = (1-profit_tax)*((1-eta_tilde)*optimal_revenue - optimal_m_R),
                          optimal_profits_NC = (1-profit_tax)*(1-eta_tilde)*z_tilde_grid,
                          TFPQ_model = (1+optimal_subsidy)*z_star_grid,
                          revenue_output = z_tilde_grid*((1+optimal_subsidy)^elasticity_ratio)
  )
  
  #### Return final results ####
  
  return(final_results)
}

# for DRS+convex model 
compute_subsidy_DRS_eps_cost_model_grid <- function(z_star_grid, epsilon_grid, z_tilde_grid, parameters, elasticity_ratio = elasticity_ratio_baseline, eta_tilde = eta_tilde_baseline){
  
  #### Step 1: Preparation ####
  
  # get parameters 
  theta <- unname(parameters["theta"])
  cost_level <- unname(parameters["cost_level"])
  cost_elast <- unname(parameters["cost_elasticity"])
  fixed_cost <- fixed_cost_profit_normalization*unname(parameters["fixed_cost"])
  
  # specify FOC 
  rent_seeking_FOC <- function(m_R,z_tilde_value,epsilon){
    return( z_tilde_value*( (1+(epsilon*((m_R)^(theta)) - cost_level*(m_R^cost_elast)))^elasticity_ratio )*( (theta*epsilon*((m_R)^(theta-1)) - cost_level*cost_elast*(m_R^(cost_elast-1))) ) - 1 )
  }
  
  # for lower bound of root finding: use something close to zero 
  lower_bound <- 1e-32
  
  #### Step 2: Solve for m_R* using FOCs ####
  
  get_optimal_m_R <- Vectorize(FUN = function(epsilon,z_tilde_value){
    
    # upper_bound: can pick this as max (never want to spend beyond max subsidy (without constraints))
    upper_bound <- ((theta*epsilon)/(cost_level*cost_elast))^(1/(cost_elast - theta))
    
    # Compute optimal m_R (if epsilon < cost_level, then optimum between 0 & 1 and can just set to max subsidy given low cost)
    optimal_m_R <- (uniroot(
      f = function(x){rent_seeking_FOC(m_R = x, z_tilde_value = z_tilde_value, epsilon = epsilon)},
      interval = c(lower_bound,upper_bound),
      extendInt = "downX", # "downX" extend interval in case initial doesnt work (in case upper bound not large enough?)
      tol = 0.00001,
      maxiter = 10000))$root
    
    # return optimal m_R
    return(optimal_m_R)
  }, vectorize.args = c("epsilon","z_tilde_value"))
  
  optimal_m_R <- get_optimal_m_R(epsilon = epsilon_grid, z_tilde_value = z_tilde_grid)
  
  #### Step 3: Solve for remaining objects needed ####
  
  final_results <- tibble(z_star = z_star_grid,
                          epsilon = epsilon_grid,
                          z_tilde = z_tilde_grid,
                          optimal_m_R = optimal_m_R,
                          optimal_subsidy = epsilon_grid*(optimal_m_R^theta) - cost_level*(optimal_m_R^cost_elast),
                          optimal_revenue = z_tilde_grid*( (1 + optimal_subsidy )^(elasticity_ratio + 1) ),
                          optimal_profits_C = (1-profit_tax)*((1-eta_tilde)*optimal_revenue - optimal_m_R),
                          optimal_profits_NC = (1-profit_tax)*(1-eta_tilde)*z_tilde_grid,
                          TFPQ_model = (1+optimal_subsidy)*z_star_grid) 
  
  final_results <- final_results %>% 
    mutate(
      # update based on fixed cost & selection into connections
      choose_C = optimal_profits_C - fixed_cost > optimal_profits_NC,
      optimal_subsidy = ifelse(choose_C,optimal_subsidy,0.0),
      optimal_m_R = ifelse(choose_C, optimal_m_R, 0.0),
      optimal_revenue = ifelse(choose_C, optimal_revenue, z_tilde),
      optimal_profits = ifelse(choose_C, optimal_profits_C - (1-profit_tax)*fixed_cost, optimal_profits_NC),
      TFPQ_model = (1+optimal_subsidy)*z_star, 
      revenue_output_C = z_tilde_grid*((1+optimal_subsidy)^elasticity_ratio), 
      revenue_output_NC = z_tilde_grid
    )
  
  #### Return final results ####
  
  return(final_results)
}

#### Functions to get joint distribution of subsidies over (z*,eps) ####

# Compute subsidy with DRS technology 
compute_subsidy_DRS_eps_model_joint <- function(parameters, n_sampling = 10000, 
                                                z_star_NC = z_star_NC_baseline, z_star_grid_dense = z_star_grid_dense_baseline, 
                                                z_star_grid_dense_long = z_star_grid_dense_long_baseline, 
                                                z_tilde_grid_dense_long = z_tilde_grid_dense_long_baseline, 
                                                z_star_grid_dense_marginal_distrib_long = z_star_grid_dense_marginal_distrib_long_baseline,
                                                revenue_NC_z_star_grid_dense_long = revenue_NC_z_star_grid_dense_long_baseline,
                                                elasticity_ratio = elasticity_ratio_baseline, eta_tilde = eta_tilde_baseline, n_connect = nrow_connected,
                                                empirical_QR = empirical_QR_baseline){
  
  ##### Step 0: Prepare data given parameters #####
  
  print(parameters)
  
  # get fixed cost into right level 
  fixed_cost <- fixed_cost_profit_normalization*parameters["fixed_cost"]
  
  ### get good range for epsilon (simulating min and max)
  
  # Maximum (already take into account that correlation will be negative!!) 
  set.seed(12345)
  min_z_star <- unname(quantile(x = z_star_NC, probs = c(0.025))) # take 2.5% percentile
  random_draw_eps_max <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(min_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_max <- unname(quantile(x = random_draw_eps_max, probs = c(0.975))) # take 97.5% percentile 
  
  # Minimum 
  set.seed(12345)
  max_z_star <- unname(quantile(x = z_star_NC, probs = c(0.975))) # take 97.5% percentile
  random_draw_eps_min <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(max_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_min <- max(1e-32,unname(quantile(x = random_draw_eps_min, probs = c(0.025))))
  
  #
  if(eps_min > eps_max){
    print("Warning: eps min > eps max, will set eps max larger")
    eps_max <- 2*eps_min
  }
  
  # create long dense epsilon grid 
  epsilon_grid_dense_long <- (crossing(
    z_star = z_star_grid_dense,
    epsilon = seq(
      from = eps_min,
      to = eps_max, 
      length.out = grid_dense_length)))$epsilon 
  
  ##### Step 1: Get probability distribution over states #####
  
  # Create df
  results_df <- tibble(
    z_star = z_star_grid_dense_long,
    z_tilde = z_tilde_grid_dense_long, 
    epsilon = epsilon_grid_dense_long,
    z_star_marginal_distrib = z_star_grid_dense_marginal_distrib_long, 
    epsilon_condit_distrib = dnorm(
      x = epsilon,
      mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star),
      sd = sqrt(parameters["variance_eps_z"]))
    )
  
  # Get joint density for each state
  results_df <- results_df %>% 
    group_by(z_star) %>% 
    mutate(
      # first normalize within z_star
      epsilon_condit_distrib = epsilon_condit_distrib/sum(epsilon_condit_distrib)
      ) %>% ungroup() %>% 
    mutate(
      # then get joint density: product of marginal & condit distrib  
      joint_density = epsilon_condit_distrib*z_star_marginal_distrib, 
      # and make sure its normalized to 1 
      joint_density = joint_density/sum(joint_density)
    )
  
  ## Next, draw from joint densities to reduce dimensionality 
  
  # draw representative sub-sample from state space 
  set.seed(12345) 
  sampled_entries <- sample(
    x = c(1:nrow(results_df)), 
    size = n_sampling, replace = T, 
    prob = results_df$joint_density)
  
  # restrict results to sampled entries (to reduce dimensionality)
  results_df <- results_df[sampled_entries,]
  
  ##### Step 2: Fill (restricted) results df with optimal choices  #####
  
  # Directly estimate results for restricted results_df sample 
  results_df <- compute_subsidy_DRS_eps_model_grid(
    z_star_grid = results_df$z_star, 
    epsilon_grid = results_df$epsilon, 
    z_tilde_grid = results_df$z_tilde, 
    theta = parameters["theta"])
  
  # fill in rest 
  results_df <- results_df %>% 
    mutate(
      # update based on fixed cost & selection into connections
      choose_C = optimal_profits_C - fixed_cost > optimal_profits_NC,
      optimal_subsidy = ifelse(choose_C,optimal_subsidy,0.0),
      optimal_m_R = ifelse(choose_C, optimal_m_R, 0.0),
      optimal_revenue = ifelse(choose_C, optimal_revenue, revenue_NC_z_star_grid_dense_long[sampled_entries]),
      optimal_profits = ifelse(choose_C, optimal_profits_C - (1-profit_tax)*fixed_cost, optimal_profits_NC),
      TFPQ_model = (1+optimal_subsidy)*z_star
    )
  
  ##### Step 3: Get results, TFPQ & rent-seeking distribution over connected firms #####
  
  # restrict to firms that would choose to become connected 
  connected_df <- results_df[results_df$choose_C,]
  
  # Build quantiles of TFPR distribution of connected firms in data 
  connected_df <- connected_df %>% arrange(TFPQ_model)
  
  quantiles_connected_TFPQ_model <- unname(quantile(
    x = connected_df$TFPQ_model,
    probs = c(1:n_connect)/(n_connect+1) 
  ))
  
  ##### Step 4: Build & return objective #####
  
  # Compute Quantile ratio in Model 
  model_QR <- (compute_baseline_TFPQ_QR(
    data_connect = tibble(
      index = c(1:n_connect),
      TFPQ_resid = quantiles_connected_TFPQ_model), 
    data_nonconnect = results_df %>% rename(TFPQ_resid = z_star), 
    cutoff = 0.0))$TFPQ_QR
  
  # evaluate objective over quantiles of TFPQ
  objective_QR_moment <- sum( (empirical_QR - model_QR)^2 )
  objective_QR_moment_norm <- objective_QR_moment/(sum( (mean(empirical_QR) - empirical_QR)^2 ))
  
  # Also compute implied probability of becoming connected 
  proba_C <- sum(results_df$choose_C)/n_sampling
  
  # return 
  return(list(objective = objective_QR_moment_norm, 
              results_df = results_df, 
              quantiles_model = quantiles_connected_TFPQ_model, 
              proba_C = proba_C
  ))
}

# Compute subsidy with DRS + costs technology (Baseline!)
compute_subsidy_DRS_eps_cost_model_joint <- function(parameters, n_sampling = 10000, 
                                                     z_star_NC = z_star_NC_baseline, z_star_grid_dense = z_star_grid_dense_baseline, 
                                                     z_star_grid_dense_long = z_star_grid_dense_long_baseline, 
                                                     z_tilde_grid_dense_long = z_tilde_grid_dense_long_baseline, 
                                                     z_star_grid_dense_marginal_distrib_long = z_star_grid_dense_marginal_distrib_long_baseline,
                                                     revenue_NC_z_star_grid_dense_long = revenue_NC_z_star_grid_dense_long_baseline,
                                                     elasticity_ratio = elasticity_ratio_baseline, eta_tilde = eta_tilde_baseline, n_connect = nrow_connected,
                                                     empirical_QR = empirical_QR_baseline){
  
  ##### Step 0: Prepare data given parameters #####
  
  print(parameters)
  
  # get fixed cost into right level 
  fixed_cost <- fixed_cost_profit_normalization*parameters["fixed_cost"]
  
  ### get good range for epsilon (simulating min and max)
  
  # Maximum (already take into account that correlation will be negative!!) 
  set.seed(12345)
  min_z_star <- unname(quantile(x = z_star_NC, probs = c(0.025))) # take 2.5% percentile
  random_draw_eps_max <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(min_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_max <- unname(quantile(x = random_draw_eps_max, probs = c(0.975))) # take 97.5% percentile 
  
  # Minimum 
  set.seed(12345)
  max_z_star <- unname(quantile(x = z_star_NC, probs = c(0.975))) # take 97.5% percentile
  random_draw_eps_min <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(max_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_min <- max(1e-32,unname(quantile(x = random_draw_eps_min, probs = c(0.025))))
  
  #
  if(eps_min > eps_max){
    print("Warning: eps min > eps max, will set eps max larger")
    eps_max <- 2*eps_min
  }
  
  # create long dense epsilon grid 
  epsilon_grid_dense_long <- (crossing(
    z_star = z_star_grid_dense,
    epsilon = seq(
      from = eps_min,
      to = eps_max, 
      length.out = grid_dense_length)))$epsilon 
  
  ##### Step 1: Get probability distribution over states #####
  
  # Create df
  results_df <- tibble(
    z_star = z_star_grid_dense_long,
    z_tilde = z_tilde_grid_dense_long, 
    epsilon = epsilon_grid_dense_long,
    z_star_marginal_distrib = z_star_grid_dense_marginal_distrib_long, 
    epsilon_condit_distrib = dnorm(
      x = epsilon,
      mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star),
      sd = sqrt(parameters["variance_eps_z"]))
  )
  
  # Get joint density for each state
  results_df <- results_df %>% 
    group_by(z_star) %>% 
    mutate(
      # first normalize within z_star
      epsilon_condit_distrib = epsilon_condit_distrib/sum(epsilon_condit_distrib),
      # make sure that if all entries are 0 within z-star, then set to equal weight, not NaN 
      epsilon_condit_distrib = ifelse(is.nan(epsilon_condit_distrib), 1/n(), epsilon_condit_distrib)
    ) %>% ungroup() %>% 
    mutate(
      # then get joint density: product of marginal & condit distrib  
      joint_density = epsilon_condit_distrib*z_star_marginal_distrib, 
      # and make sure its normalized to 1 
      joint_density = joint_density/sum(joint_density)
    )
  
  ## Next, draw from joint densities to reduce dimensionality 
  
  # draw representative sub-sample from state space 
  set.seed(12345) 
  sampled_entries <- sample(
    x = c(1:nrow(results_df)), 
    size = n_sampling, replace = T, 
    prob = results_df$joint_density)
  
  # restrict results to sampled entries (to reduce dimensionality)
  results_df <- results_df[sampled_entries,]
  
  ##### Step 2: Fill (restricted) results df with optimal choices  #####
  
  # Directly estimate results for restricted results_df sample 
  results_df <- compute_subsidy_DRS_eps_cost_model_grid(
    z_star_grid = results_df$z_star, 
    epsilon_grid = results_df$epsilon, 
    z_tilde_grid = results_df$z_tilde, 
    parameters = parameters, 
    elasticity_ratio = elasticity_ratio, 
    eta_tilde = eta_tilde)
  
  ##### Step 3: Get results, TFPQ & rent-seeking distribution over connected firms #####
  
  # restrict to firms that would choose to become connected 
  connected_df <- results_df[results_df$choose_C,]
  
  # Build quantiles of TFPR distribution of connected firms in data 
  connected_df <- connected_df %>% arrange(TFPQ_model)
  
  quantiles_connected_TFPQ_model <- unname(quantile(
    x = connected_df$TFPQ_model,
    probs = c(1:n_connect)/(n_connect+1) 
  ))
  
  ##### Step 4: Build & return objective #####
  
  # Compute Quantile ratio in Model 
  model_QR <- (compute_baseline_TFPQ_QR(
    data_connect = tibble(
      index = c(1:n_connect),
      TFPQ_resid = quantiles_connected_TFPQ_model), 
    data_nonconnect = results_df %>% rename(TFPQ_resid = z_star), 
    cutoff = 0.0))$TFPQ_QR
  
  # evaluate objective over quantiles of TFPQ
  objective_QR_moment <- sum( (empirical_QR - model_QR)^2 )
  objective_QR_moment_norm <- objective_QR_moment/(sum( (mean(empirical_QR) - empirical_QR)^2 ))
  
  # Also compute implied probability of becoming connected 
  proba_C <- sum(results_df$choose_C)/n_sampling
  
  # return 
  return(list(objective = objective_QR_moment_norm, 
              results_df = results_df, 
              quantiles_model = quantiles_connected_TFPQ_model, 
              proba_C = proba_C
  ))
}

#### Find optimal DRS parameters #### 

# start with initial guess 
guess_param_DRS <- c(
  "fixed_cost" = 0.0,
  "theta" = 1 - eta_tilde_baseline, # (limit is 1-eta_tilde)
  "alpha_eps" = 0.185, 
  "beta_eps" = -0.05, 
  "variance_eps_z" = 0.0001125)

# check that get results given guess 
test_DRS_eps <- compute_subsidy_DRS_eps_model_joint(parameters = guess_param_DRS, n_sampling = 10000)

## Objective works! 
test_DRS_eps$objective

# optimal subsidies
summary(test_DRS_eps$results_df$optimal_subsidy)

# test that marginal distribution of z* is indeed the same!! 
ggplot() + 
  geom_density(aes(x = log(test_DRS_eps$results_df$z_star), color = "Model")) + 
  geom_density(aes(x = log(z_star_NC_baseline), color = "Data"))

## Solve for optimal parameters (previous grid search showed that higher theta is better, so take maximum allowed by theory = 1 - eta_tilde. Need to add a bit of gap, otherwise not stable)

# get initial parameter guess 
initial_parameter_DRS_eps <- c(
  "alpha_eps" = 0.185, 
  "beta_eps" = -0.05,
  "variance_eps_z" = 0.0001125,
  "theta" = 0.155)

### Find parameters using gradient-based "L-BFGS-B" (box-constrained Newton method)

## Start optim parallel (to run in parallel)
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_DRS_eps <- optimParallel( 
  par = initial_parameter_DRS_eps/initial_parameter_DRS_eps, 
  fn = function(par){
    parameters <- par*initial_parameter_DRS_eps
    
    # check parameter combinations 
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star_NC_baseline)) < 0){
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0,
                       "theta" = 0.155),
        n_sampling = 5000)
      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5,0.5,0.9), 
  upper = c(2.0,2.0,2.0,0.158/0.155), 
  control = list(factr = 1e8) 
)

stopCluster(cl) 

# check results 
optimal_DRS_eps$par*initial_parameter_DRS_eps
optimal_DRS_eps$value # R2 = 0.496%, obj = 0.504
optimal_DRS_eps$message # converged! 

# implied rho? -0.976 (also: almost perfect negative correlation) 
ratio_beta_sigma_DRS <- unname(((optimal_DRS_eps$par*initial_parameter_DRS_eps)["beta_eps"])/sqrt((optimal_DRS_eps$par*initial_parameter_DRS_eps)["variance_eps_z"]))
ratio_beta_sigma_DRS/sqrt((ratio_beta_sigma_DRS)^2 + 1)

# now find these results 
results_optimal_DRS_eps <- compute_subsidy_DRS_eps_model_joint(
  parameters = c(optimal_DRS_eps$par*initial_parameter_DRS_eps,"fixed_cost" = 0.0), 
  n_sampling = 50000)

ggplot() + 
  geom_line(aes(x = c(1:nrow_connected)/(nrow_connected+1), 
                y = quantiles_connected_TFPQ_data_baseline, 
                color = "Data")) + 
  geom_line(aes(x = c(1:nrow_connected)/(nrow_connected+1), 
                y = results_optimal_DRS_eps$quantiles_model, 
                color = "Model (test)")) +
  ylab("TFPQ (connected)") + 
  xlab("Quantile")

ggplot() + 
  geom_density(aes(x = quantiles_connected_TFPQ_data_baseline, fill = "Data"), alpha = 0.5) + 
  geom_density(aes(x = results_optimal_DRS_eps$quantiles_model, fill = "Model"), alpha = 0.5) + 
  theme_classic() + 
  ylab("Density") + 
  xlab("TFPQ")

# compute TFPQ quantile ratio in model 
model_DRS_TFPQ_QR_noF <- compute_baseline_TFPQ_QR(
  data_connect = tibble(
    index = c(1:241),
    TFPQ_resid = results_optimal_DRS_eps$quantiles_model), 
  data_nonconnect = results_optimal_DRS_eps$results_df %>% 
    rename(TFPQ_resid = z_star), 
  cutoff = 0.0)

# Plot estimated TFPQ QR schedule  
ggplot() + 
  geom_line(data = CI_baseline_TFPQ_QR_bs_upper, aes(x = productivity_order, y = TFPQ_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_point(aes(x = c(1:241), y = baseline_TFPQ_QR_noF$TFPQ_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = c(1:241), y = model_DRS_TFPQ_QR_noF$TFPQ_QR, color = "Model"), linewidth = 1.0) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Rank") + 
  scale_color_discrete(name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")

#### Find optimal DRS+convex parameters #### 

# start with initial guess 
guess_param_DRS_convex <- c(
  "fixed_cost" = 0.0, 
  "theta" = 0.2, 
  "alpha_eps" = 0.041, 
  "beta_eps" = -0.011, 
  "variance_eps_z" = 4.656e-07, 
  "cost_level" = 1e-08, 
  "cost_elasticity" = 1.04)

# check that get results given guess 
test_DRS_eps_cost <- compute_subsidy_DRS_eps_cost_model_joint(parameters = guess_param_DRS_convex, n_sampling = 10000)

## Objective seems to work! 
test_DRS_eps_cost$objective

# optimal subsidies
summary(test_DRS_eps_cost$results_df$optimal_subsidy)

# test that marginal distribution of z* is indeed the same!! 
ggplot() + 
  geom_density(aes(x = log(test_DRS_eps_cost$results_df$z_star), color = "Model")) + 
  geom_density(aes(x = log(z_star_NC_baseline), color = "Data"))

### Algorithm: Gradient-based method, but fix elasticities (grid-search showed that these give the best fit) 

# then solve for optimal parameters 
initial_parameter_DRS_eps_cost <- c(
  "alpha_eps" = 0.041, 
  "beta_eps" = -0.011, 
  "variance_eps_z" = 4.656e-07, 
  "cost_level" = 1e-08, 
  "theta" = 0.2,
  "cost_elasticity" = 1.04)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_DRS_eps_cost <- optimParallel( 
  par = initial_parameter_DRS_eps_cost/initial_parameter_DRS_eps_cost, 
  fn = function(par){
    parameters <- par*initial_parameter_DRS_eps_cost
    
    # check parameter combinations 
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star_NC_baseline)) < 0.01){
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_cost_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0), 
        n_sampling = 10000)
      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5,0.5,0.5,1.0,1/1.04), 
  upper = c(2.0,2.0,2.0,2.0,1.0,1.1) 
)

stopCluster(cl) 

optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost
optimal_DRS_eps_cost$value # R2 = 0.855, obj = 0.145
optimal_DRS_eps_cost$message # Converged! 

### Solve for remaining parameters ### 

# solve for correlation rho: -0.999
ratio_beta_sigma <- ((optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)["beta_eps"])/sqrt((optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)["variance_eps_z"])
baseline_estimate_rho <- ratio_beta_sigma/sqrt((ratio_beta_sigma)^2 + 1)

# save parameters 
baseline_parameters <- c(
  optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost,
  "fixed_cost" = 0.0, 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = unname(baseline_estimate_rho)
  )

# also save as dataframe (to read in Julia)
baseline_parameters_df <- tibble(
  "alpha_eps" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[1]],
  "beta_eps" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[2]],
  "variance_eps_z" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[3]],
  "cost_level" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[4]],
  "fixed_cost" = 0.0, 
  "theta" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[5]], 
  "cost_elasticity" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[6]], 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = unname(baseline_estimate_rho)
)
write_csv(x = baseline_parameters_df, file = "Computation/data/baseline_parameters.csv")

# now find these results 
results_optimal_DRS_eps_cost <- compute_subsidy_DRS_eps_cost_model_joint(
  parameters = c(optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost,
                 "fixed_cost" = 0.0), 
  n_sampling = 50000)

ggplot() + 
  geom_line(aes(x = c(1:nrow_connected)/(nrow_connected+1), 
                y = quantiles_connected_TFPQ_data_baseline, 
                color = "Data")) + 
  geom_line(aes(x = c(1:nrow_connected)/(nrow_connected+1), 
                y = results_optimal_DRS_eps_cost$quantiles_model, 
                color = "Model (test)")) +
  ylab("TFPQ (connected)") + 
  xlab("Quantile")

ggplot() + 
  geom_density(aes(x = quantiles_connected_TFPQ_data_baseline, fill = "Data"), alpha = 0.5) + 
  geom_density(aes(x = results_optimal_DRS_eps_cost$quantiles_model, fill = "Model"), alpha = 0.5) + 
  theme_classic() + 
  ylab("Density") + 
  xlab("TFPQ")

# compute TFPQ quantile ratio in model 
model_DRS_convex_TFPQ_QR_noF <- compute_baseline_TFPQ_QR(
  data_connect = tibble(
    index = c(1:241),
    TFPQ_resid = results_optimal_DRS_eps_cost$quantiles_model), 
  data_nonconnect = results_optimal_DRS_eps_cost$results_df %>% 
    rename(TFPQ_resid = z_star), 
  cutoff = 0.0)

# Plot estimated TFPQ QR schedule  
ggplot() + 
  geom_line(data = CI_baseline_TFPQ_QR_bs_upper, aes(x = productivity_order, y = TFPQ_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_point(aes(x = c(1:241), y = baseline_TFPQ_QR_noF$TFPQ_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = c(1:241), y = model_DRS_convex_TFPQ_QR_noF$TFPQ_QR, color = "Model"), linewidth = 1.0) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Rank") + 
  scale_color_discrete(name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")

## Check implied (differential) intermediate input share 
model_implied_interm_inputs <- results_optimal_DRS_eps_cost$results_df %>% 
  mutate(
    intermediate_share = (optimal_m_R + gamma_gross*optimal_revenue)/optimal_revenue, 
    diff_intermediate_share = optimal_m_R/optimal_revenue
  ) 
mean(model_implied_interm_inputs$diff_intermediate_share) # 4pp

#### Plot results ####

# get data moments 
main_results_df <- firms_1997 %>% select(TFPQ_resid,connected) %>% 
  mutate(Type = "Data") %>% 
  rename(TFPQ = TFPQ_resid)

# add model moments
main_results_df <- rbind(
  main_results_df, 
  results_optimal_DRS_eps_cost$results_df %>% 
    filter(choose_C) %>% 
    select(TFPQ_model) %>% 
    mutate(connected = "connected", Type = "Model") %>% 
    rename(TFPQ = TFPQ_model),
  results_optimal_DRS_eps_cost$results_df %>% 
    select(z_star) %>% 
    mutate(connected = "non-connected", Type = "Model") %>% 
    rename(TFPQ = z_star)
)

## Figure 3 (left panel): TFPQ distributions ##

plot_TFPQ_distrib_model <- ggplot(
  main_results_df %>% 
    mutate(connected = case_when(
      connected == "connected" ~ "C", 
      connected == "non-connected" ~ "NC"
    )),  
  aes(x = log(TFPQ), y = connected, color = Type, fill = Type)) +
  geom_density_ridges(
    alpha = 0.6, color = "black", quantile_lines = TRUE, quantiles = 2,
    show.legend = TRUE, bandwidth = 0.075) +
  scale_y_discrete(name = "") +
  scale_fill_manual(values = c("#e74c3c","#3498db"), labels = c("Data", "Model (Baseline)"), name = "Type:") + # "#F9918A","#33CCD0" c("#D55E0050", "#0072B250")
  theme_classic(base_size = 16, base_family = "LM Roman 10") +
  theme(legend.position = "bottom") +
  xlab("log TFPQ")

## Figure 3 (right panel): TFPQ QR schedule ##

# Plot estimated TFPQ QR schedule  
plot_TFPQ_QR_model <- ggplot() + 
  geom_line(data = CI_baseline_TFPQ_QR_bs_upper, aes(x = (productivity_order/241)*100, y = TFPQ_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_point(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_QR_noF$TFPQ_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = (c(1:241)/241)*100, y = model_DRS_TFPQ_QR_noF$TFPQ_QR, color = "Model (DRS)"), linewidth = 0.5) + 
  geom_line(aes(x = (c(1:241)/241)*100, y = model_DRS_convex_TFPQ_QR_noF$TFPQ_QR, color = "Model (Baseline)"), linewidth = 1.2) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Percentiles") + 
  scale_color_manual(values = c("#e74c3c","#3498db","#6A6D6D"), name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")

ggarrange(
  plot_TFPQ_distrib_model + labs(caption="(A) TFPQ distributions C vs NC ") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_TFPQ_QR_model + labs(caption="(B) TFPQ quantile ratio") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  
ggsave(width = 12, height = 5, dpi = 200,
       "Paper writing/Revision JPE Macro/Final paper/Publication Figures/Figure3.pdf") 

################ Baseline results Productivity Process ################

#### Data moments ####

# In data: select non-connected firms in 1997 that survive until 1998. 
# Get their z* (pre) and z* (post)
# In data, run regression of log(TFPQ post) on log(TFPQ pre) & collect regression coefficients 
lm_prod_process <- lm(formula = log_z_star_next_NC_survivors ~ log_z_star_NC_survivors)

# Moments are 2 coefficients & variance 
lm_prod_process$coefficients
var(lm_prod_process$residuals)

#### Model & solve for optimal parameters ####

# Write objective function for estimating productivity process 
objective_productivity_process <- function(
    productivity_param,
    parameters = baseline_parameters){
  
  ##### Step 0: Prepare data given parameters (can move this out!) #####
  
  print(productivity_param)
  
  # extract parameters
  rho_z <- productivity_param["rho_z"]
  mean_zeta_star <- productivity_param["mean_zeta_star"]
  var_zeta_star <- productivity_param["var_zeta_star"]
  
  ##### Step 1: Update marginal productivity distribution of survivors given productivity_param #####
  
  # draw new log_z_star (could also have many more rnorm draws!) 
  set.seed(12345)
  log_z_star_new <- rho_z*log_z_star_NC_survivors + rnorm(n = length(log_z_star_NC_survivors), mean = mean_zeta_star, sd = sqrt(var_zeta_star))
  
  ##### Step 2: Draw new epsilon based on new z_star #####
  
  # Draw new epsilon based on new z_star 
  set.seed(12345)
  new_epsilon <- rnorm(n = length(log_z_star_NC_survivors), 
                       mean = parameters["alpha_eps"] + parameters["beta_eps"]*log_z_star_new, 
                       sd = sqrt(parameters["variance_eps_z"])
                       )
  
  ##### Step 3: Solve for optimal policies/subsidies ##### 
  
  results_df <- compute_subsidy_DRS_eps_cost_model_grid(
    z_star_grid = exp(log_z_star_new), 
    epsilon_grid = new_epsilon, 
    z_tilde_grid = z_tilde_baseline(z_star = exp(log_z_star_new)), 
    parameters = parameters)
  
  ##### Step 4: Construct objective ##### 
  
  # create regression dataset
  reg_df <- rbind(
    # first dataset only non-connected 
    results_df %>% select(z_star) %>% 
      mutate(weight = 1 - unname(parameters["proba_c"]),
             TFPQ_prev = exp(log_z_star_NC_survivors)) %>%  
      rename(TFPQ = z_star), 
    # second dataset only connected 
    results_df %>% select(TFPQ_model) %>% 
      mutate(weight = unname(parameters["proba_c"]), 
             TFPQ_prev = exp(log_z_star_NC_survivors)) %>%  
      rename(TFPQ = TFPQ_model)
  )
  
  # Now run regression 
  main_regression <- lm(formula = log(TFPQ) ~ log(TFPQ_prev), data = reg_df, weights = reg_df$weight)
  
  # construct objective 
  error_constant <- abs(main_regression$coefficients[[1]] - lm_prod_process$coefficients[[1]])/lm_prod_process$coefficients[[1]]
  error_rho <- abs(main_regression$coefficients[[2]] - lm_prod_process$coefficients[[2]])/lm_prod_process$coefficients[[2]]
  error_var <- abs(matrixStats::weightedVar(x = main_regression$residuals, w = reg_df$weight) - var(lm_prod_process$residuals))/var(lm_prod_process$residuals)
  
  # full objective is sum of errors 
  objective <- sum(error_constant^2 + error_rho^2 + error_var^2)
  
  # return 
  return(list(objective = objective, 
              moment1 = main_regression$coefficients[[1]], 
              moment2 = main_regression$coefficients[[2]], 
              moment3 = matrixStats::weightedVar(x = main_regression$residuals, w = reg_df$weight)))
}

initial_param_guess_prod_process <- c(
  "rho_z" = lm_prod_process$coefficients[[2]], 
  "mean_zeta_star" = lm_prod_process$coefficients[[1]], 
  "var_zeta_star" = var(lm_prod_process$residuals)
) 

# test 
objective_productivity_process(productivity_param = initial_param_guess_prod_process)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_param_prod_process <- optimParallel( 
  par = initial_param_guess_prod_process, 
  fn = function(par){
    result <- objective_productivity_process(productivity_param = par)
    print(paste("Objective is: ", result$objective))
    return(result$objective)
  },
  method = "L-BFGS-B",
  lower = c(0.9,0.06,0.01), 
  upper = c(1.0,0.15,0.03)
)

stopCluster(cl) 

optimal_param_prod_process$par
optimal_param_prod_process$value # objective < 1e-11
optimal_param_prod_process$message # converged 

# solve these 
optimal_param_prod_process_results <- objective_productivity_process(productivity_param = optimal_param_prod_process$par)

# Use these to add to paper & Julia code 

################ Baseline results Exit/Entry processes ################

#### Step 1: Discretize z and get transition matrix over z ####

# from Tauchen -- could also use Rouwenhorst!
library(Rtauchen)

# get transition matrix 
transition_z <- Rtauchen(
  ne = 1000, 
  ssigma_eps = sqrt(optimal_param_prod_process$par[[3]]), 
  llambda_eps = optimal_param_prod_process$par[[1]], 
  m = 2.5)
write_csv(x = as_tibble(transition_z), file = "Computation/data/transition_matrix_z.csv")

# get corresponding grid (and make sure mean is correct!) 
mean_uncond_log_z_star <- (optimal_param_prod_process$par[[2]]/(1-optimal_param_prod_process$par[[1]]))
log_z_star_grid_EE <- as.vector(Tgrid(
  ne = 1000, 
  ssigma_eps = sqrt(optimal_param_prod_process$par[[3]]), 
  llambda_eps = optimal_param_prod_process$par[[1]], 
  m = 2.5) + mean_uncond_log_z_star)

z_star_grid_EE <- exp(log_z_star_grid_EE)
write_csv(x = tibble(z_star = z_star_grid_EE), file = "Computation/data/z_star_grid.csv")

# test that this gives correct numbers: looks good! 

test_prod_process <- tibble(log_z_star = log_z_star_grid_EE,
                            log_z_star_next = NA)
for(row in c(1:nrow(test_prod_process))){
  test_prod_process$log_z_star_next[row] <- sample(x = log_z_star_grid_EE, size = 1, replace = T, prob = transition_z[row,])
}

test_lm_prod_process <- lm(data = test_prod_process, formula = log_z_star_next ~ log_z_star)
summary(test_lm_prod_process) # coefficients in line with mean & persistence 
sd(test_lm_prod_process$residuals) # sd of residual also in line
rm(test_lm_prod_process)
rm(test_prod_process)

#### Step 2: For each z, solve for optimal profits including for grid over epsilon ####

# create array to save (expected) profits conditional on z (both for NC & C)
profits_grid_EE <- tibble(
  z_star = z_star_grid_EE,
  z_tilde = z_tilde_baseline(z_star = z_star_grid_EE), 
  profits_C_expected = rep(NA, length(z_star_grid_EE)),
  profits_NC = rep(NA, length(z_star_grid_EE))
)

get_expected_profits_C_EE <- function(row, n_eps){ 
  
  ### Substep 1: Draw epsilon conditional on z_star & the probability 
  set.seed(12345)
  epsilon <- rnorm(
    n = n_eps, 
    mean = baseline_parameters[["alpha_eps"]] + baseline_parameters[["beta_eps"]]*log_z_star_grid_EE[row], 
    sd = sqrt(baseline_parameters[["variance_eps_z"]])
  )
  
  proba_epsilon <- dnorm(x = epsilon, 
        mean = baseline_parameters[["alpha_eps"]] + baseline_parameters[["beta_eps"]]*log_z_star_grid_EE[row], 
        sd = sqrt(baseline_parameters[["variance_eps_z"]]))
  
  # normalize probability to make sure it sums to 1  
  proba_epsilon <- proba_epsilon/sum(proba_epsilon)
  
  ### Substep 2: Get optimal profits 
  
  results_df <- compute_subsidy_DRS_eps_cost_model_grid(
    z_star_grid = rep(z_star_grid_EE[row], n_eps), 
    epsilon_grid = epsilon, 
    z_tilde_grid = rep(profits_grid_EE$z_tilde[row], n_eps),
    parameters = baseline_parameters)
  
  # get expected profits by taking weighted mean across all possible epsilon 
  expected_profits_C <- sum(results_df$optimal_profits_C*proba_epsilon)
  expected_profits_NC <- sum(results_df$optimal_profits_NC*proba_epsilon)
  
  # collect further objects needed for later 
  expected_m_R_C <- sum(results_df$optimal_m_R*proba_epsilon)
  expected_subsidies_C <- sum((results_df$optimal_revenue)*(results_df$optimal_subsidy/(1 + results_df$optimal_subsidy))*proba_epsilon)
  # add VAT revenue here 
  # add profit tax revenue here 
  expected_revenue_output_C <- sum(results_df$revenue_output_C*proba_epsilon)
  expected_revenue_output_NC <- sum(results_df$revenue_output_NC*proba_epsilon)
  expected_subsidy_rate_C <- sum(results_df$optimal_subsidy*proba_epsilon)
  expected_revenue_C <- sum(results_df$optimal_revenue*proba_epsilon)
  expected_revenue_NC <- sum(results_df$z_tilde*proba_epsilon)
  
  return(c("expected_profits_C" = expected_profits_C, 
           "expected_profits_NC" = expected_profits_NC, 
           "expected_m_R_C"= expected_m_R_C,
           "expected_subsidies_C" = expected_subsidies_C,
           "expected_revenue_output_C" = expected_revenue_output_C, 
           "expected_revenue_output_NC" = expected_revenue_output_NC, 
           "expected_subsidy_rate_C" = expected_subsidy_rate_C,
           "expected_revenue_C" = expected_revenue_C, 
           "expected_revenue_NC" = expected_revenue_NC))
}

# compute profits (takes a moment!) & other optimal choices 
profits_matrix_EE <- sapply(X = c(1:length(z_star_grid_EE)), FUN = function(x){get_expected_profits_C_EE(row = x, n_eps = 100)})
profits_grid_EE$profits_C_expected <- profits_matrix_EE[1,]
profits_grid_EE$profits_NC <- profits_matrix_EE[2,]
profits_grid_EE$profits_expected <- baseline_parameters[["proba_c"]]*profits_grid_EE$profits_C_expected + (1-baseline_parameters[["proba_c"]])*profits_grid_EE$profits_NC
profits_grid_EE$rent_seeking_C_expected <- profits_matrix_EE[3,]
profits_grid_EE$subsidies_expected <- baseline_parameters[["proba_c"]]*profits_matrix_EE[4,]
profits_grid_EE$revenue_output_expected <- baseline_parameters[["proba_c"]]*profits_matrix_EE[5,] + (1-baseline_parameters[["proba_c"]])*profits_matrix_EE[6,]
profits_grid_EE$revenue_C_expected <- profits_matrix_EE[8,]
profits_grid_EE$revenue_NC <- profits_matrix_EE[9,]
profits_grid_EE$revenue_expected <- baseline_parameters[["proba_c"]]*profits_grid_EE$revenue_C_expected + (1-baseline_parameters[["proba_c"]])*profits_grid_EE$revenue_NC
profits_grid_EE$subsidy_rate_C_expected <- profits_matrix_EE[7,]

# can export this 
write_csv(x = profits_grid_EE, file = "Computation/data/profits_grid_baseline.csv")

#### Step 3: VFI: Solve expected VF over z conditional on guess for Gumbel parameters ####

# VFI function 
VFI_EE <- function(guess_exp_VF,param_EE,crit = 1e-6,verbose=T){
  
  ### Step 0: Prepare objects 
  
  # get parameters 
  level_fcost <- param_EE[["level_fcost"]]
  scale_fcost <- param_EE[["scale_fcost"]]
  
  # initialize objects 
  exp_VF_new <- guess_exp_VF
  exp_profits <- profits_grid_EE$profits_expected
  
  # create function to compute expected fixed costs 
  compute_exp_fcost <- function(exit_proba,exp_VF){
    
    # make sure that gamma incomplete is robust regarding "too small" values 
    gamma_incomplete <- case_when(
      # exit_proba too close to 1 
      -log(exit_proba) < 1e-250 ~ expint::gammainc(a = 0,x = 1e-250),
      # exit_proba too close to 0 
      exit_proba < 1e-250 ~ 0.0, 
      .default = expint::gammainc(a = 0,x = -log(exit_proba))
      )
    
    # compute expected fixed cost 
    exp_fcost <- beta*exp_VF*exit_proba - scale_fcost*gamma_incomplete
    
    return(exp_fcost)
  }
  
  ### Step 1: While loop
  iteration <- 1 
  VF_diff <- Inf 
  
  while (VF_diff > crit) {
    
    # print progress
    if(verbose){
      print(paste("Iteration",iteration,"with difference:",VF_diff))
    }
    
    # get exit probability given new guess 
    exit_proba <- 1 - exp(-exp(-(beta*exp_VF_new - level_fcost)/scale_fcost))
    exp_fcost <- compute_exp_fcost(exit_proba = exit_proba, exp_VF = exp_VF_new)
    
    # compute new VF (over all z)
    VF_new <- exp_profits + (1-exit_proba)*( -exp_fcost + beta*exp_VF_new )
    
    # save old results 
    exp_VF_old <- exp_VF_new
    
    # compute new expected VF 
    exp_VF_new <- transition_z %*% VF_new
    
    # update iteration & compute difference 
    iteration <- iteration + 1 
    VF_diff <- max(abs(exp_VF_old - exp_VF_new))
    
  }
  
  # update results
  exit_proba <- 1 - exp(-exp(-(beta*exp_VF_new - level_fcost)/scale_fcost))
  exp_fcost <- compute_exp_fcost(exit_proba = exit_proba, exp_VF = exp_VF_new)
  VF <- exp_profits + (1-exit_proba)*( -exp_fcost + beta*exp_VF_new )
  
  # Return results 
  return(list(exp_VF = exp_VF_new, exit_proba = exit_proba, VF = VF, exp_fcost = exp_fcost))
}

# Test it 
test_VFI_EE <- VFI_EE(
  guess_exp_VF = (1/(1-beta))*profits_grid_EE$profits_NC, 
  param_EE = c("level_fcost" = 2500000, "scale_fcost" = 10000))

summary(test_VFI_EE$exit_proba)

#### Step 4: Construct objective function ####

## First: Need to create data moment: exit proba over z
firms_1997 <- left_join(x = firms_1997, y = firms_exit %>% select(PSID,exit_rob))
reg_firms_NC_exit <- lm(data = firms_1997 %>% filter(connected == "non-connected"),
                        formula = exit_rob ~ log(TFPQ_resid))
summary(reg_firms_NC_exit)

# residualize exit
OLS_resid_exit <- fixest::feols(
  fml = exit_rob ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit, #
  data = firms_1997) 

# get residualized exit measure 
firms_1997$exit_resid <- firms_1997$exit_rob - predict(object = OLS_resid_exit, newdata = firms_1997) + mean(firms_1997$exit_rob)

# Use this regression as target!! 
reg_firms_NC_exit_resid <- lm(
  data = firms_1997 %>% filter(connected == "non-connected"),
  formula = exit_resid ~ log(TFPQ_resid))
summary(reg_firms_NC_exit_resid)

## Second: Find distribution of NC in 1997 over z_grid 
closest_grid_points_z_star_EE <- sapply(X = z_star_NC_baseline, FUN = function(x){which.min(abs(x - z_star_grid_EE))})
empirical_distrib_z_star_NC_EE <- tibble(z_star_grid_point = closest_grid_points_z_star_EE) %>% 
  group_by(z_star_grid_point) %>% 
  summarise(share = n()/length(closest_grid_points_z_star_EE))
empirical_distrib_z_star_NC_EE <- left_join(
  x = tibble(z_star_grid_point = c(1:length(z_star_grid_EE)), 
             z_star = z_star_grid_EE), 
  y = empirical_distrib_z_star_NC_EE) 
empirical_distrib_z_star_NC_EE$share <- ifelse(is.na(empirical_distrib_z_star_NC_EE$share), 0.0, empirical_distrib_z_star_NC_EE$share)

## Third: Construct objective 
objective_exit_process <- function(param_EE){
  
  print(param_EE)
  
  # Step 1: Solve for VF given param_EE 
  results_VF <- VFI_EE(
    guess_exp_VF = (1/(1-beta))*profits_grid_EE$profits_NC, 
    param_EE = param_EE, verbose = F) 
  
  # Step 2: Run regression of exit proba on log(z_star)
  model_data <- empirical_distrib_z_star_NC_EE 
  model_data$exit_proba <- results_VF$exit_proba[,1]
  reg_model_firms_NC_exit <- lm(
    data = model_data,
    formula = exit_proba ~ log(z_star), 
    weights = share)
  
  # Step 3: Construct objective 
  moment_coef1 <- (reg_model_firms_NC_exit$coefficients[[1]] - reg_firms_NC_exit_resid$coefficients[[1]])/reg_firms_NC_exit_resid$coefficients[[1]]
  moment_coef2 <- (reg_model_firms_NC_exit$coefficients[[2]] - reg_firms_NC_exit_resid$coefficients[[2]])/reg_firms_NC_exit_resid$coefficients[[2]]
  
  print(paste("Constant is:",reg_model_firms_NC_exit$coefficients[[1]], 
              ". Slope is:", reg_model_firms_NC_exit$coefficients[[2]]))
  
  objective <- moment_coef1^2 + moment_coef2^2 
  return(list(objective = objective, results_VF = results_VF))
}

#### Step 5: Find optimal exit parameters ####

# start with good initial guess 
initial_param_guess_exit_process <- c("level_fcost" = -52300000*1.5, "scale_fcost" = 2.5e7*1.5)
objective_exit_process(param_EE = initial_param_guess_exit_process)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_param_exit_process <- optimParallel( 
  par = initial_param_guess_exit_process/initial_param_guess_exit_process, 
  fn = function(par){
    parameter <- par*initial_param_guess_exit_process
    results <- objective_exit_process(param_EE = parameter)
    print(paste("Objective is: ", results$objective))
    return(results$objective)
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5), 
  upper = c(1.2,1.2)
)

stopCluster(cl) 

optimal_param_exit_process$par*initial_param_guess_exit_process
optimal_param_exit_process$value # objective < 1e-8
optimal_param_exit_process$message # converged

# solve these 
optimal_param_exit_process_results <- objective_exit_process(param_EE = optimal_param_exit_process$par*initial_param_guess_exit_process)

# look at interquartile range of exit probabilities among producing NC 
optimal_param_exit_process_results$results_VF$exit_proba[which.min(abs(quantile(z_star_NC_baseline, probs=0.25) - z_star_grid_EE)),1] # 1st quartile TFPQ NC
optimal_param_exit_process_results$results_VF$exit_proba[which.min(abs(quantile(z_star_NC_baseline, probs=0.75) - z_star_grid_EE)),1] # 3rd quartile TFPQ NC

## Add these to main paper
fixed_cost_param <- tibble(
  level_fcost = (optimal_param_exit_process$par*initial_param_guess_exit_process)[[1]],
  scale_fcost = (optimal_param_exit_process$par*initial_param_guess_exit_process)[[2]],
)
write_csv(x = fixed_cost_param, file = "Computation/data/fixed_cost_param.csv") 

#### Step 6: Find entry cost in line with zero profit condition ####

### to get unconditional expected value function, need unconditional distrib over z*

# draw from unconditional distribution 
z_star_uncond_distrib_EE <- exp(rnorm(n = 100000, 
                                      mean = optimal_param_prod_process$par[[2]]/(1-optimal_param_prod_process$par[[1]]), 
                                      sd = sqrt(optimal_param_prod_process$par[[3]]/(1-optimal_param_prod_process$par[[1]]^2))
                                      ))

# put this distribution on the grid 
closest_grid_points_z_star_uncond_distrib_EE <- sapply(X = z_star_uncond_distrib_EE, FUN = function(x){which.min(abs(x - z_star_grid_EE))})
z_star_uncond_distrib_EE_grid <- tibble(z_star_grid_point = closest_grid_points_z_star_uncond_distrib_EE) %>% 
  group_by(z_star_grid_point) %>% 
  summarise(share = n()/length(closest_grid_points_z_star_uncond_distrib_EE))
# as long as all points given, then can proceed with this (otherwise, make sure all grid points given)
write_csv(x = z_star_uncond_distrib_EE_grid, file = "Computation/data/z_star_uncond_distrib.csv")

# compute expected entry value 
expected_entry_value <- sum(optimal_param_exit_process_results$results_VF$VF[,1]*z_star_uncond_distrib_EE_grid$share)

# How big is this? 
expected_entry_value/mean(firms_1997$routs) # about 14% of avg firm revenue 
expected_entry_value/9040 # in 000' USD: about 1.1mio USD (fairly large?) 

################ Baseline GE (to back out aggregate labor supply) ################

#### Find stationary distribution over z* #### 

# function to compute baseline stationary distribution 
find_baseline_stationary_distrib <- function(initial_distrib, exit_proba, crit = 1e-16, max_iter = 1000, verbose = F){
  
  # initialize objects 
  diff <- Inf 
  iter <- 1
  prev_distrib <- initial_distrib
  entry_distrib <- initial_distrib # assumes that initial distrib is unconditional one! 
  
  # start loop of iterating on distribution 
  while (diff > crit & iter < max_iter) {
    
    if(verbose){
      print(paste("Iteration",iter,"with difference:",diff))
    }
    
    # Step 1: Compute survivors from last period 
    survivor_distrib <- prev_distrib*(1-exit_proba)
    mass_exit <- sum(prev_distrib*exit_proba) # will add this back later! 
    
    # Step 2: Update their productivity 
    survivor_distrib_new <- (survivor_distrib %*% transition_z)[1,]
    
    # Step 3: Add new entrants (come from unconditional distribution)
    entrant_distrib <- entry_distrib*mass_exit # ensure that entry and exit always balance 
    
    # Step 4: put all together 
    new_distrib <- survivor_distrib_new + entrant_distrib
    
    # Step 5: update
    diff <- max(abs(new_distrib - prev_distrib))
    iter <- iter + 1 
    prev_distrib <- new_distrib
    
  }
  
  # return SS distribution 
  return(new_distrib)
}

# find stationary distribution 
baseline_SS_distribution <- find_baseline_stationary_distrib(
  initial_distrib = z_star_uncond_distrib_EE_grid$share, 
  exit_proba = optimal_param_exit_process_results$results_VF$exit_proba[,1], 
  verbose = T) 
write_csv(x = tibble(share = baseline_SS_distribution), file = "Computation/data/baseline_SS_distribution.csv")

## Validation: Check TFPQ distribution of NC in data vs. SS in model 
tibble(z_star = z_star_grid_EE, 
       Model = baseline_SS_distribution, 
       Data = empirical_distrib_z_star_NC_EE$share) %>% 
  pivot_longer(cols = c(Model,Data), names_to = "Type", values_to = "Density") %>%
  filter(z_star <= 35 & z_star >= 5) %>% 
  ggplot(aes(x = z_star, y = Type, height = Density, group = Type, fill = Type)) + 
  geom_density_ridges(stat = "identity", scale = 2.5)
## Fit is not great!! Data features much less low-productive firms; potentially in line with more selection on entry, which is not modelled here. 

#### Use stationary distribution over z* to compute all aggregates #### 

optimal_param_exit_process_results$results_VF$exit_proba[,1]

compute_baseline_aggregates <- function(SS_distrib,exit_proba){
  
  # Get total (productive) inputs 
  productive_capital <- sum((1-vat_tax)*profits_grid_EE$revenue_expected*alpha_gross*SS_distrib/R)
  productive_labor <- sum((1-vat_tax)*profits_grid_EE$revenue_expected*beta_gross*SS_distrib)
  productive_intermediates <- sum(profits_grid_EE$revenue_expected*gamma_gross*SS_distrib)
  
  # Get total output (need to construct revenue output in data!) 
  total_output <- sum(profits_grid_EE$revenue_output_expected*SS_distrib)
  
  # Get total entry costs 
  mass_exit <- sum(SS_distrib*exit_proba) # which has to equal mass of entrants in SS 
  total_entry_costs <- expected_entry_value*mass_exit
  
  # Can construct total labor demand (which equals total labor supply in baseline equilibrium)
  total_labor_demand <- total_entry_costs + productive_labor
  
  # Get total rent-seeking activities 
  total_rent_seeking <- baseline_parameters[["proba_c"]]*sum(profits_grid_EE$rent_seeking_C_expected*SS_distrib)
  
  # total new investments (in SS)
  total_investments <- delta*productive_capital
  
  # total consumption (enforce aggregate resource constraint -- only true if closed economy or BOP = 0) 
  total_consumption <- total_output - total_investments - productive_intermediates - total_rent_seeking
  
  ## Could add HH budget constraint as consistency check for constructing consumption!! 
  
  # total tax revenues & subsidies 
  cit_revenue = sum(profits_grid_EE$profits_expected*SS_distrib)*(profit_tax/(1-profit_tax))
  vat_revenue = sum(vat_tax*((1-gamma_gross)*profits_grid_EE$revenue_expected - baseline_parameters[["proba_c"]]*profits_grid_EE$rent_seeking_C_expected)*SS_distrib)
  total_tax_revenues <- cit_revenue + vat_revenue
  total_subsidies <- sum(SS_distrib*profits_grid_EE$subsidies_expected)
  total_govt_budget <- total_tax_revenues - total_subsidies
  
  # Also compute total paid fixed costs 
  total_fixed_costs <- sum(SS_distrib*(1-exit_proba)*optimal_param_exit_process_results$results_VF$exp_fcost[,1])
  
  return(list(
    "productive_capital" = productive_capital,
    "productive_labor" = productive_labor,
    "productive_intermediates" = productive_intermediates,
    "total_output" = total_output,
    "total_entry_costs" = total_entry_costs,
    "total_labor_demand" = total_labor_demand,
    "total_rent_seeking" = total_rent_seeking,
    "total_investments" = total_investments,
    "total_consumption" = total_consumption,
    "total_tax_revenues" = total_tax_revenues,
    "total_subsidies" = total_subsidies, 
    "total_govt_budget" = total_govt_budget,
    "total_fixed_costs" = total_fixed_costs))
}

results_baseline_aggregates <- compute_baseline_aggregates(SS_distrib = baseline_SS_distribution, 
                                                           exit_proba = optimal_param_exit_process_results$results_VF$exit_proba[,1])

# About 0.16% of total resources on rent-seeking 
results_baseline_aggregates$total_rent_seeking/results_baseline_aggregates$total_output

# about 36.5% of output is consumed by HHs 
results_baseline_aggregates$total_consumption/results_baseline_aggregates$total_output

# taxes & subsidies: about 35% of corporate tax revenues are spend on subsidizing connected firms 
results_baseline_aggregates$total_subsidies/results_baseline_aggregates$total_tax_revenues

# Or alternatively: Tax revenue from corporate taxation could be 54% higher if (implicit) subsidies to connected firms were shut down
results_baseline_aggregates$total_tax_revenues/(results_baseline_aggregates$total_tax_revenues - results_baseline_aggregates$total_subsidies)

# Fixed costs: Net negative!! 
results_baseline_aggregates$total_fixed_costs/results_baseline_aggregates$total_output

# export 
write_csv(x = as_tibble(results_baseline_aggregates), file = "Computation/data/baseline_aggregates.csv")


#################################
###### Validation of model ###### 
#################################

#### Data preparation #### 

# get main data 
model_results_df <- results_optimal_DRS_eps_cost$results_df %>% 
  mutate(
    intermediate_share = (optimal_m_R + gamma_gross*optimal_revenue)/optimal_revenue, 
    diff_intermediate_share = optimal_m_R/optimal_revenue
  ) 
mean(model_results_df$diff_intermediate_share) # 4pp

# also get DRS data 
model_DRS_results_df <- results_optimal_DRS_eps$results_df %>% 
  mutate(
    intermediate_share = (optimal_m_R + gamma_gross*optimal_revenue)/optimal_revenue, 
    diff_intermediate_share = optimal_m_R/optimal_revenue
  ) 
mean(model_DRS_results_df$diff_intermediate_share) # 4.65pp 

#### Step 1 (partly in Appendix): Distribution of rent-seeking and subsidies in model #### 

## Distribution plot: Rent-seeking (left)
plot_m_R_distrib <- model_results_df %>% 
  ggplot() + geom_point(aes(x = log(TFPQ_model), y = optimal_m_R/optimal_revenue), alpha = 0.1, size = 0.1) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "Rent-seeking cost share") + 
  xlab(label = "TFPQ (log)") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10")

## Distribution plot: Subsidies (left)
plot_subsidy_distrib <- model_results_df %>% 
  ggplot() + geom_point(aes(x = log(TFPQ_model), y = optimal_subsidy), alpha = 0.1, size = 0.1) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "Subsidy rate") + 
  xlab(label = "TFPQ (log)") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10")

ggarrange(
  plot_m_R_distrib, #  + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))) 
  plot_subsidy_distrib, #+ theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  
# ggsave(width = 12, height = 5, dpi = 200,
#        "Paper writing/Revision JPE Macro/Final paper/Publication Figures/FigureA_baseline_m_R_and_subsidy_variation.png") 

#### Step 2 Indirect evidence: differential intermediate input shares #### 

### Level differences ###

# Unconditional median & mean: Connected firms have 7-11 pp. higher intermediate expenditure share 
firms_1997 %>% group_by(connected) %>% 
  summarise(med_intermediate_share = median(intermediate_share),
            avg_intermediate_share = mean(intermediate_share))

## Lets look at same moment after residualizing intermediate share 

# median & mean (after residualizing): Connected firms have 5-8 pp. higher intermediate expenditure share 
firms_1997 %>% group_by(connected) %>% 
  summarise(med_intermediate_share = median(intermediate_share_resid),
            avg_intermediate_share = mean(intermediate_share_resid))

## Regression results (as reported in paper) 

# Raw results (no additional fixed effects): 7.3pp 
summary(fixest::feols(fml = intermediate_share ~ connected, 
                      data = firms_1997 %>% mutate(connected = (connected == "connected"))))

# Result in paper: Further add FEs & cluster SE at industry-level: 5.7 pp
summary(fixest::feols(fml = intermediate_share ~ connected | industry_4digit + estyear_FE + prov + state_owned_major + state_owned_partly, 
                      data = firms_1997 %>% mutate(connected = (connected == "connected"))))
# with CI from 2.1 - 9.2pp 

## Compare to model: 4.0pp (baseline) vs. 4.65pp of DRS model (both models are very similar)
mean(model_results_df$diff_intermediate_share)
mean(model_DRS_results_df$diff_intermediate_share)


### Variance differences ###

### Compute differential variance in intermediate share 

# What about dispersion in intermediate input shares? Connected indeed slightly higher!
diff_var_interm_share <- firms_1997 %>% group_by(connected) %>% 
  summarise(var_intermediate_share = var(intermediate_share_resid))
diff_var_interm_share$var_intermediate_share[1] - diff_var_interm_share$var_intermediate_share[2]

## Bootstrap 95% confidence band for differential variance of intermediate share 
compute_bs_var_interm_share <- function(data_connect,data_nonconnect, bs_draws = 10000){
  
  ## Draw with replacement from intermediate input share distribution
  
  n_draws_C <- nrow(data_connect)
  n_draws_NC <- nrow(data_nonconnect)
  
  # set seeds
  set.seed(123456)
  bootstrap_seeds = rnorm(n = bs_draws, mean = 1000, sd = 100)
  
  # Get sampling function 
  take_sample <- Vectorize(function(data,seed,n_draws){
    set.seed(seed)
    return(sample(x = data, size = n_draws, replace = TRUE))
  }, vectorize.args = c("seed"))
  
  # bootstrap samples
  bs_share_C <- take_sample(
    data = data_connect$intermediate_share_resid, 
    seed = bootstrap_seeds, n_draws = n_draws_C)
  
  bs_share_NC <- take_sample(
    data = data_nonconnect$intermediate_share_resid, 
    seed = bootstrap_seeds, n_draws = n_draws_NC)
  
  ## Compute variances 
  
  variances_C <- apply(bs_share_C, 2, var)
  variances_NC <- apply(bs_share_NC, 2, var)
  
  # compute differential variances 
  diff_variances <- variances_C - variances_NC
  
  # return results
  return(diff_variances)
}

diff_var_interm_share_bs <- compute_bs_var_interm_share(
  data_connect = firms_1997 %>% filter(connected == "connected"),
  data_nonconnect = firms_1997 %>% filter(connected == "non-connected"),
  bs_draws = 10000)

# get 95% CI: ranges from -0.003 to 0.015
quantile(x = diff_var_interm_share_bs, probs = c(0.025,0.975))

# Model: Variance in intermediate share very similar between baseline & DRS model 
var(model_results_df$intermediate_share)
var(model_DRS_results_df$intermediate_share)

#### Step 2 Indirect evidence (Appendix 1): Composition of differential interm input shares #### 

# Check subcomponents of intermediates, focus on "other expenditures" 

## Baseline difference in intermediates share 

# baseline difference: 5.7pp 
lm_baseline_diff_intermediate_share <- fixest::feols(
  fml = intermediate_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE,
  data = firms_1997)
summary(lm_baseline_diff_intermediate_share)

# small and non-significant 
lm_indirect_tax_diff <- fixest::feols(
  fml = indirect_tax_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE,
  data = firms_1997 %>% mutate(indirect_tax_share = indirect_taxes/routs))
summary(lm_indirect_tax_diff)

# other expenditures (overall): non-connected firms have lower "other expenditures"
lm_other_exp_diff <- fixest::feols(
  fml = other_exp_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE,
  data = firms_1997 %>% mutate(other_exp_share = other_expenses_total/routs))
summary(lm_other_exp_diff) # but note that this only explains about 8% of model-implied difference in intermediates

# check sub-components: bigger charity for non-connected 
lm_charity_diff <- fixest::feols(
  fml = charity_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE,
  data = firms_1997 %>% mutate(charity_share = other_expenses_charity/routs))
summary(lm_charity_diff)

# lower royalty share for non-connected 
lm_royalty_diff <- fixest::feols(
  fml = royalty_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE,
  data = firms_1997 %>% mutate(royalty_share = other_expenses_royalty/routs))
summary(lm_royalty_diff)

# advertising unclear 
lm_advertising_diff <- fixest::feols(
  fml = advertising_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE,
  data = firms_1997 %>% mutate(advertising_share = other_expenses_advertising/routs))
summary(lm_advertising_diff)

# environment unclear 
lm_environment_diff <- fixest::feols(
    fml = environment_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE, 
    data = firms_1997 %>% mutate(environment_share = other_expenses_environment/routs))
summary(lm_environment_diff)

# repr allowance unclear 
lm_repr_allowance_diff <- fixest::feols(
    fml = repr_allowance_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE, 
    data = firms_1997 %>% mutate(repr_allowance_share = other_expenses_repr_allowance/routs))
summary(lm_repr_allowance_diff)

# less management fees for non-connected  
lm_mngmt_fee_diff <- fixest::feols(
    fml = mgmt_fee_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE, 
    data = firms_1997 %>% mutate(mgmt_fee_share = other_expenses_mngmt_fee/routs))
summary(lm_mngmt_fee_diff)

# travel unclear/slightly more for non-connected
lm_travel_diff <- fixest::feols(
    fml = travel_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE, 
    data = firms_1997 %>% mutate(travel_share = other_expenses_travel/routs))
summary(lm_travel_diff)

# RandD unclear (in line with connected firms not being more productive)
lm_RandD_diff <- fixest::feols(
    fml = RandD_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE, 
    data = firms_1997 %>% mutate(RandD_share = other_expenses_RandD/routs))
summary(lm_RandD_diff)

# HR unclear/zero 
lm_HR_diff <- fixest::feols(
    fml = HR_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE, 
    data = firms_1997 %>% mutate(HR_share = other_expenses_HR/routs))
summary(lm_HR_diff)

# less other expenses for non-connected firms 
lm_other_diff <- fixest::feols(
    fml = other_share ~ connected | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE, 
    data = firms_1997 %>% mutate(other_share = other_expenses_other/routs))
summary(lm_other_diff)

## Save in regression table (Latex)
fixest::etable(
  lm_baseline_diff_intermediate_share, 
  lm_other_exp_diff, 
  lm_advertising_diff,
  lm_HR_diff,
  lm_mngmt_fee_diff,
  lm_RandD_diff,
  lm_repr_allowance_diff,
  lm_royalty_diff,
  lm_other_diff, 
  tex = TRUE)


#### Step 2 Indirect evidence (Appendix 1): Distribution of R&D expenditure #### 

# First residualize RandD expenditure share in the data 

# create share
firms_1997 <- firms_1997 %>% mutate(RandD_share = other_expenses_RandD/routs)

# residualize R&D distribution
OLS_resid_RandD <- fixest::feols(
  fml = RandD_share ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit, #
  data = firms_1997)

# get residualized RandD share
firms_1997$RandD_share_resid <- firms_1997$RandD_share - predict(object = OLS_resid_RandD, newdata = firms_1997) + mean(firms_1997$RandD_share)

# Then look at entire relative distribution in the data 

# R&D distribution 
plot_RandD_distrib <- firms_1997 %>% 
  mutate(
    connections = fct_rev(as.factor(connected)), 
    connected = case_when(
      connected == "connected" ~ "C", 
      connected == "non-connected" ~ "NC")) %>% 
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = RandD_share_resid,
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, 
    show.legend = TRUE, bandwidth = 0.0001) + 
  labs(
    x = "R&D share",
    y = ""
  ) + 
  xlim(c(-0.001,0.005)) + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

# # output results 
# ggsave(width = 6, height = 4, "Paper writing/Revision JPE Macro/paper revision round 2/Figures/relative_RandD_distribution.png")


#### Step 2 Indirect evidence (Appendix 2): Differences across blood vs. normal connected firms #### 

# Blood connected firms are larger than normal connected? In line with comparative statics 
firms_1997 %>% group_by(connections_type) %>% 
  summarise(mean_TFPQ = mean(log_TFPQ_resid), 
            median_TFPQ = median(log_TFPQ_resid))

# In regression form: Normal connected are smaller, but not significant
lm_blood_diff_size <- fixest::feols(
  fml = log_TFPQ_gross ~ connections_type | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE,
  data = firms_1997)
summary(lm_blood_diff_size)

## Model predicts that conditional on TFPQ: higher epsilon means higher rent-seeking
summary(lm(
  formula = diff_intermediate_share ~ epsilon, 
  # restrict data to same TFPQ around median 
  data = model_results_df %>% filter(TFPQ_model > 34.95 & TFPQ_model < 35.05)))

# not controlling for TFPQ, effect is still positive 
summary(lm(
  formula = diff_intermediate_share ~ epsilon, 
  # restrict data to same TFPQ around median 
  data = model_results_df))

# Linearly controlling for TFPQ, effect is bigger and positive 
summary(lm(
  formula = diff_intermediate_share ~ epsilon + log(TFPQ_model), 
  # restrict data to same TFPQ around median 
  data = model_results_df))

### In Data: 

# Blood connected firms dont have larger intermediate shares conditional on TFPQ 
lm_blood_diff_interm_share <- fixest::feols(
  fml = intermediate_share ~ connections_type + log_TFPQ_gross | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE,
  data = firms_1997 %>% filter(connections_type %in% c("Blood connected", "Normal connected")))
summary(lm_blood_diff_interm_share)
# Though not significant! 

# significant when taking full sample 
lm_blood_diff_interm_share_full <- fixest::feols(
  fml = intermediate_share ~ connections_type + log_TFPQ_gross | industry_4digit + prov + state_owned_major + state_owned_partly + estyear_FE,
  data = firms_1997)
summary(lm_blood_diff_interm_share_full)

### Export results as regression table (Latex)
fixest::etable(
  lm_blood_diff_size, 
  lm_blood_diff_interm_share,
  lm_blood_diff_interm_share_full,
  tex = TRUE)


#### Step 3: Direct evidence from Odebrecht case #### 

## Compare to Odebrecht case: Look at top 25% connected firms (in terms of size/TFPQ)

# Full distribution (share goes to zero for largest firms) 
model_results_df %>% 
  ggplot() + geom_point(aes(x = TFPQ_model, y = diff_intermediate_share))

# Look at top 25%: Avg & median are 1.56-1.57% (Reported: Avg of 0.016)
summary((model_results_df %>% 
  # order by TFPQ
  arrange(TFPQ_model) %>%  
  # drop cutoff*% of observations
  dplyr::slice((round(0.75*n()) + 1):n()))$diff_intermediate_share)

# DRS model: Look at top 25%: Avg & median is 5.1-5.3% (Reported: Avg of 0.051)
summary((model_DRS_results_df %>% 
           # order by TFPQ
           arrange(TFPQ_model) %>%  
           # drop cutoff*% of observations
           dplyr::slice((round(0.75*n()) + 1):n()))$diff_intermediate_share)

### Profit share Odebrecht case vs model ### 

# Overall: Mean is 43.7%  (53.2% in DRS model)
summary(model_results_df$optimal_m_R/model_results_df$optimal_profits_C)
summary(model_DRS_results_df$optimal_m_R/model_DRS_results_df$optimal_profits_C)

# Restrict to top 25%: 13.8% 
model_results_df %>% 
  # order by TFPQ
  arrange(TFPQ_model) %>%  
  # drop cutoff*% of observations
  dplyr::slice((round(0.75*n()) + 1):n()) %>% 
  # look at avg profit share 
  summarise(avg_profit_share = mean(optimal_m_R/optimal_profits_C))

# DRS model: Restrict to top 25%: 62.9% (far off!)
model_DRS_results_df %>% 
  # order by TFPQ
  arrange(TFPQ_model) %>%  
  # drop cutoff*% of observations
  dplyr::slice((round(0.75*n()) + 1):n()) %>% 
  # look at avg profit share 
  summarise(avg_profit_share = mean(optimal_m_R/optimal_profits_C))

### Subsidy estimate: Total costs after renegotiation / initial cost (assume competitive price) 

# Overall: (taken from Total (2001-2016) in Table 2)
134051/106384 # about 26% price markup 

# average across projects (from Table 2: Indiv ratios): 42% 
mean(c(13343/4141, 2134/1828, 5853/4588, 4074/3466, 384/384, 3059/2155, 10391/8839, 17253/14904, 77559/66080))

# in model: 42% (super close!) 
(model_results_df %>% 
  # order by TFPQ
  arrange(TFPQ_model) %>%  
  # drop cutoff*% of observations
  dplyr::slice((round(0.75*n()) + 1):n()) %>% 
  # look at avg profit share 
  summarise(avg_subsidy_rate = mean(1 + optimal_subsidy)))[[1]]

# in DRS model: 53% (less close!) 
(model_DRS_results_df %>% 
  # order by TFPQ
  arrange(TFPQ_model) %>%  
  # drop cutoff*% of observations
  dplyr::slice((round(0.75*n()) + 1):n()) %>% 
  # look at avg profit share 
  summarise(avg_subsidy_rate = mean(1 + optimal_subsidy)))[[1]]

## Full distribution of subsidies: Avg subsidy rate of 44%  
model_results_df %>% 
  # look at avg profit share 
  summarise(avg_subsidy_rate = mean(1 + optimal_subsidy))

##########################################
###### Estimation of model with DRS ###### 
##########################################

################ DRS results Productivity Process ################

#### Model & solve for optimal parameters ####

# save parameters 
DRS_parameters <- c(
  optimal_DRS_eps$par*initial_parameter_DRS_eps,
  "fixed_cost" = 0.0, 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = ratio_beta_sigma_DRS/sqrt((ratio_beta_sigma_DRS)^2 + 1)
  )

# Write objective function for estimating productivity process 
objective_productivity_process_DRS <- function(
    productivity_param,
    parameters = DRS_parameters){
  
  ##### Step 0: Prepare data given parameters (can move this out!) #####
  
  print(productivity_param)
  
  # extract parameters
  rho_z <- productivity_param["rho_z"]
  mean_zeta_star <- productivity_param["mean_zeta_star"]
  var_zeta_star <- productivity_param["var_zeta_star"]
  
  ##### Step 1: Update marginal productivity distribution of survivors given productivity_param #####
  
  # draw new log_z_star (could also have many more rnorm draws!) 
  set.seed(12345)
  log_z_star_new <- rho_z*log_z_star_NC_survivors + rnorm(n = length(log_z_star_NC_survivors), mean = mean_zeta_star, sd = sqrt(var_zeta_star))
  
  ##### Step 2: Draw new epsilon based on new z_star #####
  
  # Draw new epsilon based on new z_star 
  set.seed(12345)
  new_epsilon <- rnorm(n = length(log_z_star_NC_survivors), 
                       mean = parameters["alpha_eps"] + parameters["beta_eps"]*log_z_star_new, 
                       sd = sqrt(parameters["variance_eps_z"])
  )
  
  # Check that epsilon are positive 
  if(sum(new_epsilon < 0) > 0){
    print(paste("Warning: There are ", sum(new_epsilon < 0), " negative epsilon with min ", round(min(new_epsilon),4), ". Setting all negative ones to 1e-16."))
  }
  
  # ensure that epsilon are positive 
  new_epsilon[new_epsilon < 0] <- 1e-16 
  
  ##### Step 3: Solve for optimal policies/subsidies ##### 
  
  results_df <- compute_subsidy_DRS_eps_model_grid(
    z_star_grid = exp(log_z_star_new), 
    epsilon_grid = new_epsilon,
    z_tilde_grid = z_tilde_baseline(z_star = exp(log_z_star_new)), 
    theta = DRS_parameters[["theta"]])
  
  ##### Step 4: Construct objective ##### 
  
  # create regression dataset
  reg_df <- rbind(
    # first dataset only non-connected 
    results_df %>% select(z_star) %>% 
      mutate(weight = 1 - unname(parameters["proba_c"]),
             TFPQ_prev = exp(log_z_star_NC_survivors)) %>%  
      rename(TFPQ = z_star), 
    # second dataset only connected 
    results_df %>% select(TFPQ_model) %>% 
      mutate(weight = unname(parameters["proba_c"]), 
             TFPQ_prev = exp(log_z_star_NC_survivors)) %>%  
      rename(TFPQ = TFPQ_model)
  )
  
  # Now run regression 
  main_regression <- lm(formula = log(TFPQ) ~ log(TFPQ_prev), data = reg_df, weights = reg_df$weight)
  
  # construct objective 
  error_constant <- abs(main_regression$coefficients[[1]] - lm_prod_process$coefficients[[1]])/lm_prod_process$coefficients[[1]]
  error_rho <- abs(main_regression$coefficients[[2]] - lm_prod_process$coefficients[[2]])/lm_prod_process$coefficients[[2]]
  error_var <- abs(matrixStats::weightedVar(x = main_regression$residuals, w = reg_df$weight) - var(lm_prod_process$residuals))/var(lm_prod_process$residuals)
  
  # print(main_regression$coefficients[[1]])
  # print(main_regression$coefficients[[2]])
  # print(var(main_regression$residuals))
  
  # full objective is sum of errors 
  objective <- sum(error_constant^2 + error_rho^2 + error_var^2)
  
  # return 
  return(list(objective = objective, 
              moment1 = main_regression$coefficients[[1]], 
              moment2 = main_regression$coefficients[[2]], 
              moment3 = matrixStats::weightedVar(x = main_regression$residuals, w = reg_df$weight)))
}

DRS_initial_param_guess_prod_process <- c(
  "rho_z" = lm_prod_process$coefficients[[2]], 
  "mean_zeta_star" = lm_prod_process$coefficients[[1]], 
  "var_zeta_star" = var(lm_prod_process$residuals)
) 

# test 
objective_productivity_process_DRS(productivity_param = DRS_initial_param_guess_prod_process)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
DRS_optimal_param_prod_process <- optimParallel( 
  par = DRS_initial_param_guess_prod_process, 
  fn = function(par){
    result <- objective_productivity_process_DRS(productivity_param = par)
    print(paste("Objective is: ", result$objective))
    return(result$objective)
  },
  method = "L-BFGS-B",
  lower = c(0.9,0.06,0.01), 
  upper = c(1.0,0.15,0.03)
)

stopCluster(cl) 

# converged! 
DRS_optimal_param_prod_process$par
DRS_optimal_param_prod_process$value # obj < 1e-9
DRS_optimal_param_prod_process$message # converged 

################ Baseline results Exit/Entry processes ################

#### Step 1: Discretize z and get transition matrix over z ####

# from Tauchen -- could also use Rouwenhorst!
library(Rtauchen)

# get transition matrix 
DRS_transition_z <- Rtauchen(
  ne = 1000, 
  ssigma_eps = sqrt(DRS_optimal_param_prod_process$par[[3]]), 
  llambda_eps = DRS_optimal_param_prod_process$par[[1]], 
  m = 2.5)
write_csv(x = as_tibble(DRS_transition_z), file = "Computation/data/DRS_transition_matrix_z.csv")

# get corresponding grid (and make sure mean is correct!) 
DRS_mean_uncond_log_z_star <- (DRS_optimal_param_prod_process$par[[2]]/(1-DRS_optimal_param_prod_process$par[[1]]))
DRS_log_z_star_grid_EE <- as.vector(Tgrid(
  ne = 1000, 
  ssigma_eps = sqrt(DRS_optimal_param_prod_process$par[[3]]), 
  llambda_eps = DRS_optimal_param_prod_process$par[[1]], 
  m = 2.5) + DRS_mean_uncond_log_z_star)

DRS_z_star_grid_EE <- exp(DRS_log_z_star_grid_EE)
write_csv(x = tibble(z_star = DRS_z_star_grid_EE), file = "Computation/data/DRS_z_star_grid.csv")

# test that this gives correct numbers: looks good! 

DRS_test_prod_process <- tibble(log_z_star = DRS_log_z_star_grid_EE,
                            log_z_star_next = NA)
for(row in c(1:nrow(DRS_test_prod_process))){
  DRS_test_prod_process$log_z_star_next[row] <- sample(x = DRS_log_z_star_grid_EE, size = 1, replace = T, prob = DRS_transition_z[row,])
}

DRS_test_lm_prod_process <- lm(data = DRS_test_prod_process, formula = log_z_star_next ~ log_z_star)
summary(DRS_test_lm_prod_process) # coefficients in line with mean & persistence 
sd(DRS_test_lm_prod_process$residuals) # sd of residual also in line
rm(DRS_test_lm_prod_process)
rm(DRS_test_prod_process)

#### Step 2: For each z, solve for optimal profits including for grid over epsilon ####

# create array to save (expected) profits conditional on z (both for NC & C)
DRS_profits_grid_EE <- tibble(
  z_star = DRS_z_star_grid_EE,
  z_tilde = z_tilde_baseline(z_star = DRS_z_star_grid_EE), 
  profits_C_expected = rep(NA, length(DRS_z_star_grid_EE)),
  profits_NC = rep(NA, length(DRS_z_star_grid_EE))
)

DRS_get_expected_profits_C_EE <- function(row, n_eps){ 
  
  ### Substep 1: Draw epsilon conditional on z_star & the probability 
  set.seed(12345)
  epsilon <- rnorm(
    n = n_eps, 
    mean = DRS_parameters[["alpha_eps"]] + DRS_parameters[["beta_eps"]]*DRS_log_z_star_grid_EE[row], 
    sd = sqrt(DRS_parameters[["variance_eps_z"]])
  )
  
  # ensure that epsilon is always positive
  epsilon[epsilon < 0] <- 1e-16 
  
  # get probability/density 
  proba_epsilon <- dnorm(x = epsilon, 
                         mean = DRS_parameters[["alpha_eps"]] + DRS_parameters[["beta_eps"]]*DRS_log_z_star_grid_EE[row], 
                         sd = sqrt(DRS_parameters[["variance_eps_z"]]))
  
  # normalize probability to make sure it sums to 1  
  proba_epsilon <- proba_epsilon/sum(proba_epsilon)
  
  ### Substep 2: Get optimal profits 
  
  results_df <- compute_subsidy_DRS_eps_model_grid(
    z_star_grid = rep(DRS_z_star_grid_EE[row], n_eps), 
    epsilon_grid = epsilon, 
    z_tilde_grid = rep(DRS_profits_grid_EE$z_tilde[row], n_eps),
    theta = DRS_parameters[["theta"]])
  
  # get expected profits by taking weighted mean across all possible epsilon 
  expected_profits_C <- sum(results_df$optimal_profits_C*proba_epsilon)
  expected_profits_NC <- sum(results_df$optimal_profits_NC*proba_epsilon)
  
  # collect further objects needed for later 
  expected_m_R_C <- sum(results_df$optimal_m_R*proba_epsilon)
  expected_subsidies_C <- sum((results_df$optimal_revenue)*(results_df$optimal_subsidy/(1 + results_df$optimal_subsidy))*proba_epsilon)
  # add VAT revenue here 
  # add profit tax revenue here 
  expected_revenue_output_C <- sum(results_df$revenue_output_C*proba_epsilon)
  expected_revenue_output_NC <- sum(results_df$revenue_output_NC*proba_epsilon)
  expected_subsidy_rate_C <- sum(results_df$optimal_subsidy*proba_epsilon)
  expected_revenue_C <- sum(results_df$optimal_revenue*proba_epsilon)
  expected_revenue_NC <- sum(results_df$z_tilde*proba_epsilon)
  
  return(c("expected_profits_C" = expected_profits_C, 
           "expected_profits_NC" = expected_profits_NC, 
           "expected_m_R_C"= expected_m_R_C,
           "expected_subsidies_C" = expected_subsidies_C,
           "expected_revenue_output_C" = expected_revenue_output_C, 
           "expected_revenue_output_NC" = expected_revenue_output_NC, 
           "expected_subsidy_rate_C" = expected_subsidy_rate_C,
           "expected_revenue_C" = expected_revenue_C, 
           "expected_revenue_NC" = expected_revenue_NC))
}

# compute profits (takes a moment!) & other optimal choices 
DRS_profits_matrix_EE <- sapply(X = c(1:length(DRS_z_star_grid_EE)), FUN = function(x){DRS_get_expected_profits_C_EE(row = x, n_eps = 100)})
DRS_profits_grid_EE$profits_C_expected <- DRS_profits_matrix_EE[1,]
DRS_profits_grid_EE$profits_NC <- DRS_profits_matrix_EE[2,]
DRS_profits_grid_EE$profits_expected <- DRS_parameters[["proba_c"]]*DRS_profits_grid_EE$profits_C_expected + (1-DRS_parameters[["proba_c"]])*DRS_profits_grid_EE$profits_NC
DRS_profits_grid_EE$rent_seeking_C_expected <- DRS_profits_matrix_EE[3,]
DRS_profits_grid_EE$subsidies_expected <- DRS_parameters[["proba_c"]]*DRS_profits_matrix_EE[4,]
DRS_profits_grid_EE$revenue_output_expected <- DRS_parameters[["proba_c"]]*DRS_profits_matrix_EE[5,] + (1-DRS_parameters[["proba_c"]])*DRS_profits_matrix_EE[6,]
DRS_profits_grid_EE$revenue_C_expected <- DRS_profits_matrix_EE[8,]
DRS_profits_grid_EE$revenue_NC <- DRS_profits_matrix_EE[9,]
DRS_profits_grid_EE$revenue_expected <- DRS_parameters[["proba_c"]]*DRS_profits_grid_EE$revenue_C_expected + (1-DRS_parameters[["proba_c"]])*DRS_profits_grid_EE$revenue_NC
DRS_profits_grid_EE$subsidy_rate_C_expected <- DRS_profits_matrix_EE[7,]

# can export this 
write_csv(x = DRS_profits_grid_EE, file = "Computation/data/DRS_profits_grid_baseline.csv")

#### Step 3: VFI: Solve expected VF over z conditional on guess for Gumbel parameters ####

# VFI function 
DRS_VFI_EE <- function(guess_exp_VF,param_EE,crit = 1e-6,verbose=T){
  
  ### Step 0: Prepare objects 
  
  # get parameters 
  level_fcost <- param_EE[["level_fcost"]]
  scale_fcost <- param_EE[["scale_fcost"]]
  
  # initialize objects 
  exp_VF_new <- guess_exp_VF
  exp_profits <- DRS_profits_grid_EE$profits_expected
  
  # create function to compute expected fixed costs 
  compute_exp_fcost <- function(exit_proba,exp_VF){
    
    # make sure that gamma incomplete is robust regarding "too small" values 
    gamma_incomplete <- case_when(
      # exit_proba too close to 1 
      -log(exit_proba) < 1e-250 ~ expint::gammainc(a = 0,x = 1e-250),
      # exit_proba too close to 0 
      exit_proba < 1e-250 ~ 0.0, 
      .default = expint::gammainc(a = 0,x = -log(exit_proba))
    )
    
    # compute expected fixed cost 
    exp_fcost <- beta*exp_VF*exit_proba - scale_fcost*gamma_incomplete
    
    return(exp_fcost)
  }
  
  ### Step 1: While loop
  iteration <- 1 
  VF_diff <- Inf 
  
  while (VF_diff > crit) {
    
    # print progress
    if(verbose){
      print(paste("Iteration",iteration,"with difference:",VF_diff))
    }
    
    # get exit probability given new guess 
    exit_proba <- 1 - exp(-exp(-(beta*exp_VF_new - level_fcost)/scale_fcost))
    exp_fcost <- compute_exp_fcost(exit_proba = exit_proba, exp_VF = exp_VF_new)
    
    # compute new VF (over all z)
    VF_new <- exp_profits + (1-exit_proba)*( -exp_fcost + beta*exp_VF_new )
    
    # save old results 
    exp_VF_old <- exp_VF_new
    
    # compute new expected VF 
    exp_VF_new <- DRS_transition_z %*% VF_new
    
    # update iteration & compute difference 
    iteration <- iteration + 1 
    VF_diff <- max(abs(exp_VF_old - exp_VF_new))
    
  }
  
  # update results
  exit_proba <- 1 - exp(-exp(-(beta*exp_VF_new - level_fcost)/scale_fcost))
  exp_fcost <- compute_exp_fcost(exit_proba = exit_proba, exp_VF = exp_VF_new)
  VF <- exp_profits + (1-exit_proba)*( -exp_fcost + beta*exp_VF_new )
  
  # Return results 
  return(list(exp_VF = exp_VF_new, exit_proba = exit_proba, VF = VF, exp_fcost = exp_fcost))
}

# Test it 
DRS_test_VFI_EE <- DRS_VFI_EE(
  guess_exp_VF = (1/(1-beta))*DRS_profits_grid_EE$profits_NC, 
  param_EE = c("level_fcost" = 2500000, "scale_fcost" = 10000))

summary(DRS_test_VFI_EE$exit_proba)

#### Step 4: Construct objective function ####

## Find distribution of NC in 1997 over z_grid 
DRS_closest_grid_points_z_star_EE <- sapply(X = z_star_NC_baseline, FUN = function(x){which.min(abs(x - DRS_z_star_grid_EE))})
DRS_empirical_distrib_z_star_NC_EE <- tibble(z_star_grid_point = DRS_closest_grid_points_z_star_EE) %>% 
  group_by(z_star_grid_point) %>% 
  summarise(share = n()/length(DRS_closest_grid_points_z_star_EE))
DRS_empirical_distrib_z_star_NC_EE <- left_join(
  x = tibble(z_star_grid_point = c(1:length(DRS_z_star_grid_EE)), 
             z_star = DRS_z_star_grid_EE), 
  y = DRS_empirical_distrib_z_star_NC_EE) 
DRS_empirical_distrib_z_star_NC_EE$share <- ifelse(is.na(DRS_empirical_distrib_z_star_NC_EE$share), 0.0, DRS_empirical_distrib_z_star_NC_EE$share)

## Next: Construct objective 
DRS_objective_exit_process <- function(param_EE){
  
  print(param_EE)
  
  # Step 1: Solve for VF given param_EE 
  results_VF <- DRS_VFI_EE(
    guess_exp_VF = (1/(1-beta))*DRS_profits_grid_EE$profits_NC, 
    param_EE = param_EE, verbose = F) 
  
  # Step 2: Run regression of exit proba on log(z_star)
  model_data <- DRS_empirical_distrib_z_star_NC_EE 
  model_data$exit_proba <- results_VF$exit_proba[,1]
  reg_model_firms_NC_exit <- lm(
    data = model_data,
    formula = exit_proba ~ log(z_star), 
    weights = share)
  
  # Step 3: Construct objective 
  moment_coef1 <- (reg_model_firms_NC_exit$coefficients[[1]] - reg_firms_NC_exit_resid$coefficients[[1]])/reg_firms_NC_exit_resid$coefficients[[1]]
  moment_coef2 <- (reg_model_firms_NC_exit$coefficients[[2]] - reg_firms_NC_exit_resid$coefficients[[2]])/reg_firms_NC_exit_resid$coefficients[[2]]
  
  print(paste("Constant is:",reg_model_firms_NC_exit$coefficients[[1]], 
              ". Slope is:", reg_model_firms_NC_exit$coefficients[[2]]))
  
  objective <- moment_coef1^2 + moment_coef2^2 
  return(list(objective = objective, results_VF = results_VF))
}

#### Step 5: Find optimal exit parameters ####

# start with good initial guess 
DRS_initial_param_guess_exit_process <- c("level_fcost" = -52300000*1.5, "scale_fcost" = 2.5e7*1.5)
DRS_objective_exit_process(param_EE = DRS_initial_param_guess_exit_process)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
DRS_optimal_param_exit_process <- optimParallel( 
  par = DRS_initial_param_guess_exit_process/DRS_initial_param_guess_exit_process, 
  fn = function(par){
    parameter <- par*DRS_initial_param_guess_exit_process
    results <- DRS_objective_exit_process(param_EE = parameter)
    print(paste("Objective is: ", results$objective))
    return(results$objective)
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5), 
  upper = c(1.2,1.2)
)

stopCluster(cl) 

DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process # interior solution
DRS_optimal_param_exit_process$value # close to zero
DRS_optimal_param_exit_process$message # converged! 

# solve these 
DRS_optimal_param_exit_process_results <- DRS_objective_exit_process(param_EE = DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process)

# look at interquartile range of exit probabilities among producing NC (same as baseline!)
DRS_optimal_param_exit_process_results$results_VF$exit_proba[which.min(abs(quantile(z_star_NC_baseline, probs=0.25) - DRS_z_star_grid_EE)),1] # 1st quartile TFPQ NC
DRS_optimal_param_exit_process_results$results_VF$exit_proba[which.min(abs(quantile(z_star_NC_baseline, probs=0.75) - DRS_z_star_grid_EE)),1] # 3rd quartile TFPQ NC

## Add these to main paper
DRS_fixed_cost_param <- tibble(
  level_fcost = (DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process)[[1]],
  scale_fcost = (DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process)[[2]],
)
write_csv(x = DRS_fixed_cost_param, file = "Computation/data/DRS_fixed_cost_param.csv") 

#### Step 6: Find entry cost in line with zero profit condition ####

### to get unconditional expected value function, need unconditional distrib over z*

# draw from unconditional distribution 
DRS_z_star_uncond_distrib_EE <- exp(rnorm(n = 100000, 
                                          mean = DRS_optimal_param_prod_process$par[[2]]/(1-DRS_optimal_param_prod_process$par[[1]]), 
                                          sd = sqrt(DRS_optimal_param_prod_process$par[[3]]/(1-DRS_optimal_param_prod_process$par[[1]]^2))
))

# put this distribution on the grid 
DRS_closest_grid_points_z_star_uncond_distrib_EE <- sapply(X = DRS_z_star_uncond_distrib_EE, FUN = function(x){which.min(abs(x - DRS_z_star_grid_EE))})
DRS_z_star_uncond_distrib_EE_grid <- tibble(z_star_grid_point = DRS_closest_grid_points_z_star_uncond_distrib_EE) %>% 
  group_by(z_star_grid_point) %>% 
  summarise(share = n()/length(DRS_closest_grid_points_z_star_uncond_distrib_EE))
# as long as all points given, then can proceed with this (otherwise, make sure all grid points given)
write_csv(x = DRS_z_star_uncond_distrib_EE_grid, file = "Computation/data/DRS_z_star_uncond_distrib.csv")

# compute expected entry value 
DRS_expected_entry_value <- sum(DRS_optimal_param_exit_process_results$results_VF$VF[,1]*DRS_z_star_uncond_distrib_EE_grid$share)

# How big is this? Slightly smaller than baseline estimate 
DRS_expected_entry_value/mean(firms_1997$routs) # about 12% of avg firm revenue 
DRS_expected_entry_value/9040 # in 000' USD: about 980k USD (fairly large?) 

#### Export all parameters #### 

# save as dataframe (to read in Julia)
DRS_parameters_df <- tibble(
  "alpha_eps" = (optimal_DRS_eps$par*initial_parameter_DRS_eps)[[1]],
  "beta_eps" = (optimal_DRS_eps$par*initial_parameter_DRS_eps)[[2]],
  "variance_eps_z" = (optimal_DRS_eps$par*initial_parameter_DRS_eps)[[3]],
  "cost_level" = 0.0,
  "fixed_cost" = 0.0, 
  "theta" = (optimal_DRS_eps$par*initial_parameter_DRS_eps)[[4]], 
  "cost_elasticity" = 0.0, 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = unname(ratio_beta_sigma_DRS/sqrt((ratio_beta_sigma_DRS)^2 + 1)),
  "level_fcost" = (DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process)[[1]],
  "scale_fcost" = (DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process)[[2]],
  "rho_z" = DRS_optimal_param_prod_process$par[[1]],
  "mean_zeta_star" = DRS_optimal_param_prod_process$par[[2]],
  "var_zeta_star" = DRS_optimal_param_prod_process$par[[3]]
)
write_csv(x = DRS_parameters_df, file = "Computation/data/DRS_parameters.csv")


###########################################
###### Section 5.1 Extension: Wedges ######
###########################################

#### Step 1: measure (residualized) wedges ####

# Overview of residualized labor and capital shares 
summary(firms_1997$labor_share_resid) 
summary(firms_1997$capital_share_resid)

# make sure that residualized input cost shares are reasonable by winsorizing ends!  
firms_1997 <- firms_1997 %>% 
  mutate(
    capital_share_resid = case_when(
      capital_share_resid < quantile(capital_share_resid, probs = 0.1) ~ unname(quantile(capital_share_resid, probs = 0.1)),
      capital_share_resid > quantile(capital_share_resid, probs = 0.9) ~ unname(quantile(capital_share_resid, probs = 0.9)),
      .default = capital_share_resid),
    labor_share_resid = case_when(
      labor_share_resid < quantile(labor_share_resid, probs = 0.1, na.rm = T) ~ unname(quantile(labor_share_resid, probs = 0.1, na.rm = T)),
      labor_share_resid > quantile(labor_share_resid, probs = 0.9, na.rm = T) ~ unname(quantile(labor_share_resid, probs = 0.9, na.rm = T)),
      .default = labor_share_resid)
    )

summary(firms_1997$labor_share_resid) 
summary(firms_1997$capital_share_resid)


## Compute wedges 
firms_1997$capital_wedge = alpha_gross*(1-vat_tax)*(firms_1997$capital_share_resid^(-1)) - 1 
firms_1997$labor_wedge = beta_gross*(1-vat_tax)*(firms_1997$labor_share_resid^(-1)) - 1 

## Look at distribution of labor wedges, also for connected vs non-connected firms 

# interquartile range from 40% subsidy to 30% tax rate 
summary(firms_1997$labor_wedge)

# interquartile range from 
firms_1997 %>% filter(!is.na(labor_wedge)) %>% 
  group_by(connected) %>% 
  summarise(iqr_labor_wedge = quantile(x = labor_wedge, probs = c(0.25,0.75)))

# Find that connected firms are differentially distorted (face higher implicit cost of labor!) 
firms_1997 %>% filter(!is.na(labor_wedge)) %>%
  group_by(connected) %>% 
  summarise(med_labor_wedge = median(labor_wedge),
            avg_labor_wedge = mean(labor_wedge))
# difference is large: about 15 percentage points! 

# in terms of labor share this means median/mean difference of roughly 2.2-3.6 percentage points  
firms_1997 %>% filter(!is.na(labor_share_resid)) %>%
  group_by(connected) %>% 
  summarise(med_labor_share_resid = median(labor_share_resid),
            avg_labor_share_resid = mean(labor_share_resid))

# check 5th and 95th percentiles for connected and non-connected firms
quantile(firms_1997$labor_wedge[firms_1997$connected == "non-connected"], probs = c(0.05,0.95), na.rm = T)
quantile(firms_1997$labor_wedge[firms_1997$connected == "connected"], probs = c(0.05,0.95), na.rm = T)


## Look at distribution of capital wedges, also for connected vs non-connected firms 

# interquartile range from 55% subsidy to 75% tax rate 
firms_1997 %>% filter(!is.na(capital_wedge)) %>% 
  group_by(connected) %>% 
  summarise(iqr_capital_wedge = quantile(x = capital_wedge, probs = c(0.25,0.75)))

# Find that connected firms are differentially distorted (face higher implicit cost of capital!) 
firms_1997 %>% filter(!is.na(capital_wedge)) %>%
  group_by(connected) %>% 
  summarise(med_capital_wedge = median(capital_wedge),
            avg_capital_wedge = mean(capital_wedge))
# difference is even larger: more than 20 percentage points! 

# in terms of capital share this means median/mean difference of roughly 2.6-3.7 percentage points  
firms_1997 %>% filter(!is.na(capital_share_resid)) %>%
  group_by(connected) %>% 
  summarise(med_capital_share_resid = median(capital_share_resid),
            avg_capital_share_resid = mean(capital_share_resid))

# check 5th and 95th percentiles for connected and non-connected firms
quantile(firms_1997$capital_wedge[firms_1997$connected == "non-connected"], probs = c(0.05,0.95), na.rm = T)
quantile(firms_1997$capital_wedge[firms_1997$connected == "connected"], probs = c(0.05,0.95), na.rm = T)

### Dispersion in wedges? 

# Labor wedge is less dispersed? About 10% less dispersed
firms_1997 %>% filter(!is.na(labor_wedge)) %>%
  group_by(connected) %>% 
  summarise(var_labor_wedge = var(labor_wedge))

# capital wedge is about 32% more dispersed
firms_1997 %>% filter(!is.na(capital_wedge)) %>%
  group_by(connected) %>% 
  summarise(var_capital_wedge = var(capital_wedge))

#### Step 2: measure (residualized) TFPQ ####

## TFPQ variation

# compute TFPQ with wedges 
firms_1997 <- firms_1997 %>% 
  mutate(
    log_TFPQ_wedges = log(routs) - alpha_gross*log(alpha_gross*(1-vat_tax)*routs/(R*(1+capital_wedge))) - beta_gross*log(beta_gross*(1-vat_tax)*routs/(1+labor_wedge)) - gamma_gross*log(gamma_gross*routs), 
    TFPQ_wedges = exp(log_TFPQ_wedges))

# now residualize TFPQ wedges
OLS_resid_TFPQ_wedges <- fixest::feols(
  fml = log_TFPQ_wedges ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit, #
  data = firms_1997)

# get residualized TFPQ measure 
firms_1997$log_TFPQ_wedges_resid <- firms_1997$log_TFPQ_wedges - 
  predict(object = OLS_resid_TFPQ_wedges, newdata = firms_1997) + 
  mean(firms_1997$log_TFPQ_wedges, na.rm = T)
firms_1997$TFPQ_wedges_resid <- exp(firms_1997$log_TFPQ_wedges_resid)

#### Plot TFPQ-wedges quantile ratio & relative distributions ####

# TFPQ_wedges distribution 
plot_TFPQ_wedges_distrib <- firms_1997 %>% 
  mutate(
    connections = fct_rev(as.factor(connected)), 
    connected = case_when(
      connected == "connected" ~ "C", 
      connected == "non-connected" ~ "NC")) %>% 
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = log_TFPQ_wedges_resid, # divide by 9040 to show in 2010 USD
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, 
    show.legend = TRUE, bandwidth = 0.075) + 
  labs(
    x = "log TFPQ",
    y = ""
  ) + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

### Right panel: TFPQ_wedges_QR distribution implied by resid TFPQ_wedges 

# Get baseline TFPQ_wedges_QR function
compute_baseline_TFPQ_wedges_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # Arrange from low to high
  data_connect <- data_connect %>%
    arrange(TFPQ_wedges_resid)
  
  ## First get restricted productivity distribution
  
  ### Need to have specified cutoff! ### 
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>% 
    # remove NAs 
    filter(!is.na(TFPQ_wedges_resid)) %>% 
    # only look at TFPQ_wedges
    select(c(TFPQ_wedges_resid)) %>% 
    # order by TFPQ_wedges
    arrange(TFPQ_wedges_resid) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  restricted_productivity_distribution <- restricted_productivity_distribution$TFPQ_wedges_resid
  
  # Now get quantiles (weighted by rho if supplied)
  z_connected <- unname(quantile(
    x = restricted_productivity_distribution,
    probs = c(1:nrow_connected)/(nrow_connected+1),
  ))
  TFPQ_wedges_QR_connected <- data_connect$TFPQ_wedges_resid/z_connected - 1 
  
  # return results
  return(list(productivity = z_connected, TFPQ_wedges_QR = TFPQ_wedges_QR_connected))
}

# Get bootstrapping TFPQ_wedges_QR function 
compute_bs_baseline_TFPQ_wedges_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # create productivity variable
  data_connect$productivity <- NA
  
  # Arrange from high to low
  data_connect <- data_connect %>%
    arrange(desc(TFPQ_wedges_resid))
  
  ## First get restricted productivity distribution
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>%
    # remove NAs 
    filter(!is.na(TFPQ_wedges_resid)) %>% 
    # only look at TFPR
    select(c(TFPQ_wedges_resid)) %>% 
    # order by TFPR
    arrange(TFPQ_wedges_resid) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  ## Now, draw with replacement from restricted productivity distribution
  
  n_draws <- nrow(data_connect)
  
  # set seeds
  set.seed(123456)
  n_bootstraps = 10000
  bootstrap_seeds = rnorm(n = n_bootstraps, mean = 1000, sd = 100)
  
  # Get sampling function 
  take_sample <- Vectorize(function(data,seed,n_draws){
    set.seed(seed)
    return(sort(sample(x = data, size = n_draws, replace = TRUE), decreasing = T))
  }, vectorize.args = c("seed"))
  
  # bootstrap samples
  bs_productivities <- take_sample(data = restricted_productivity_distribution$TFPQ_wedges_resid, 
                                   seed = bootstrap_seeds, n_draws = n_draws)
  
  bs_TFPQ_wedges_QR <- data_connect$TFPQ_wedges_resid/bs_productivities - 1
  
  # return results
  return(bs_TFPQ_wedges_QR)
}

# baseline TFPQ_wedges_QR (without fixed cost)
baseline_TFPQ_wedges_QR_noF <- compute_baseline_TFPQ_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

baseline_TFPQ_wedges_QR_noF_bs <- compute_bs_baseline_TFPQ_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

# baseline TFPQ_wedges_QR (max fixed cost)
max_cutoff_wedges <- (ecdf(firms_1997$TFPQ_wedges_resid[firms_1997$connected == "non-connected"]))(
  min(firms_1997$TFPQ_wedges_resid[firms_1997$connected == "connected"])
)

baseline_TFPQ_wedges_QR_maxF <- compute_baseline_TFPQ_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = max_cutoff_wedges)

baseline_TFPQ_wedges_QR_maxF_bs <- compute_bs_baseline_TFPQ_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = max_cutoff)

## Summarize bootstrapped confidence bands  
CI_baseline_TFPQ_wedges_QR_bs_upper <- tibble(
  productivity_order = c(241:1),
  `2.5%` = t(apply(baseline_TFPQ_wedges_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,1],
  `97.5%` = t(apply(baseline_TFPQ_wedges_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,2]
) %>% pivot_longer(cols = c(`2.5%`,`97.5%`), names_to = "Percentiles", values_to = "TFPQ_wedges_QR")

CI_baseline_TFPQ_wedges_QR_bs_lower <- tibble(
  productivity_order = c(241:1),
  `2.5%` = t(apply(baseline_TFPQ_wedges_QR_maxF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,1],
  `97.5%` = t(apply(baseline_TFPQ_wedges_QR_maxF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,2]
) %>% pivot_longer(cols = c(`2.5%`,`97.5%`), names_to = "Percentiles", values_to = "TFPQ_wedges_QR")


# Plot estimated TFPQ_wedges_QR schedule  
plot_baseline_TFPQ_wedges_QR_bound <- ggplot() + 
  geom_line(data = CI_baseline_TFPQ_wedges_QR_bs_upper, aes(x = (productivity_order/241)*100, y = TFPQ_wedges_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_line(data = CI_baseline_TFPQ_wedges_QR_bs_lower, aes(x = (productivity_order/241)*100, y = TFPQ_wedges_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_line(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_wedges_QR_noF$TFPQ_wedges_QR, color = "Upper"), linewidth = 1) + 
  geom_line(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_wedges_QR_maxF$TFPQ_wedges_QR, color = "Lower"), linewidth = 1) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Percentile") + 
  scale_color_discrete(name = "Bound:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")

## joint plot 
ggarrange(
  plot_TFPQ_wedges_distrib + labs(caption="(A) TFPQ distributions C vs NC") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_baseline_TFPQ_wedges_QR_bound + labs(caption="(B) TFPQ quantile ratio (with bounds)") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  

#### Step 3: measure & plot z_tilde variation ####

## Check that TFPQ is constructed correctly with wedges! 

# Construct z_tilde wedges (check the math for this!!)
firms_1997 <- firms_1997 %>% 
  mutate(
    x_star_wedges = ((((1-vat_tax)*alpha_gross)/((1+capital_wedge)*R))^alpha_gross)*((((1-vat_tax)*beta_gross/(1+labor_wedge)))^beta_gross)*(gamma_gross^gamma_gross), 
    z_tilde_wedges = (TFPQ_wedges_resid*x_star_wedges)^(1/(1-eta_tilde_baseline))
  )

# z_tilde_wedges distribution 
plot_z_tilde_wedges_distrib <- firms_1997 %>% 
  mutate(
    connections = fct_rev(as.factor(connected)), 
    connected = case_when(
      connected == "connected" ~ "C", 
      connected == "non-connected" ~ "NC")) %>% 
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = log(z_tilde_wedges), # divide by 9040 to show in 2010 USD
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, 
    show.legend = TRUE, bandwidth = 0.4) + 
  labs(
    x = "log z tilde",
    y = ""
  ) + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

### Right panel: z_tilde_wedges_QR distribution implied by resid z_tilde_wedges 

# Get baseline z_tilde_wedges_QR function
compute_baseline_z_tilde_wedges_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # Arrange from low to high
  data_connect <- data_connect %>%
    arrange(z_tilde_wedges)
  
  ## First get restricted productivity distribution
  
  ### Need to have specified cutoff! ### 
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>% 
    # remove NAs 
    filter(!is.na(z_tilde_wedges)) %>% 
    # only look at z_tilde_wedges
    select(c(z_tilde_wedges)) %>% 
    # order by z_tilde_wedges
    arrange(z_tilde_wedges) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  restricted_productivity_distribution <- restricted_productivity_distribution$z_tilde_wedges
  
  # Now get quantiles (weighted by rho if supplied)
  z_connected <- unname(quantile(
    x = restricted_productivity_distribution,
    probs = c(1:nrow_connected)/(nrow_connected+1),
  ))
  z_tilde_wedges_QR_connected <- data_connect$z_tilde_wedges/z_connected - 1 
  
  # return results
  return(list(productivity = z_connected, z_tilde_wedges_QR = z_tilde_wedges_QR_connected))
}

# Get bootstrapping z_tilde_wedges_QR function 
compute_bs_baseline_z_tilde_wedges_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # create productivity variable
  data_connect$productivity <- NA
  
  # Arrange from high to low
  data_connect <- data_connect %>%
    arrange(desc(z_tilde_wedges))
  
  ## First get restricted productivity distribution
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>%
    # remove NAs 
    filter(!is.na(z_tilde_wedges)) %>% 
    # only look at TFPR
    select(c(z_tilde_wedges)) %>% 
    # order by TFPR
    arrange(z_tilde_wedges) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  ## Now, draw with replacement from restricted productivity distribution
  
  n_draws <- nrow(data_connect)
  
  # set seeds
  set.seed(123456)
  n_bootstraps = 10000
  bootstrap_seeds = rnorm(n = n_bootstraps, mean = 1000, sd = 100)
  
  # Get sampling function 
  take_sample <- Vectorize(function(data,seed,n_draws){
    set.seed(seed)
    return(sort(sample(x = data, size = n_draws, replace = TRUE), decreasing = T))
  }, vectorize.args = c("seed"))
  
  # bootstrap samples
  bs_productivities <- take_sample(data = restricted_productivity_distribution$z_tilde_wedges, 
                                   seed = bootstrap_seeds, n_draws = n_draws)
  
  bs_z_tilde_wedges_QR <- data_connect$z_tilde_wedges/bs_productivities - 1
  
  # return results
  return(bs_z_tilde_wedges_QR)
}

# baseline z_tilde_wedges_QR (without fixed cost)
baseline_z_tilde_wedges_QR_noF <- compute_baseline_z_tilde_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

baseline_z_tilde_wedges_QR_noF_bs <- compute_bs_baseline_z_tilde_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

## Summarize bootstrapped confidence bands  
CI_baseline_z_tilde_wedges_QR_bs_upper <- tibble(
  productivity_order = c(241:1),
  `2.5%` = t(apply(baseline_z_tilde_wedges_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,1],
  `97.5%` = t(apply(baseline_z_tilde_wedges_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,2]
) %>% pivot_longer(cols = c(`2.5%`,`97.5%`), names_to = "Percentiles", values_to = "z_tilde_wedges_QR")

# Plot estimated z_tilde_wedges_QR schedule  
plot_baseline_z_tilde_wedges_QR <- ggplot() + 
  geom_line(data = CI_baseline_z_tilde_wedges_QR_bs_upper, aes(x = (productivity_order/241)*100, y = z_tilde_wedges_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_line(aes(x = (c(1:241)/241)*100, y = baseline_z_tilde_wedges_QR_noF$z_tilde_wedges_QR, color = "Upper"), linewidth = 1) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "z tilde Quantile Ratio") + 
  xlab(label = "z tilde Percentile") + 
  scale_color_discrete(name = "Bound:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "none")

plot_baseline_z_tilde_wedges_QR

## joint plot 
ggarrange(
  plot_z_tilde_wedges_distrib + labs(caption="(A) Z tilde distributions C vs NC") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_baseline_z_tilde_wedges_QR + labs(caption="(B) Z tilde quantile ratio") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  

#### Show distribution of wedges! Correlate them with other measures, e.g. TFPQ & z_tilde #### 

### TFPQ - capital wedge correlation ### 
plot_TFPQ_capital_wedges <- ggplot() +
  geom_point(aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "non-connected"], 
                 y = (1+firms_1997$capital_wedge[firms_1997$connected == "non-connected"])^(alpha_gross), 
                 color = "non-connected", group = "non-connected"), 
             size = 0.1, alpha = 0.2) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "non-connected"], 
                  y = (1+firms_1997$capital_wedge[firms_1997$connected == "non-connected"])^(alpha_gross), 
                  color = "non-connected", group = "non-connected")) + 
  geom_point(aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "connected"], 
                 y = (1+firms_1997$capital_wedge[firms_1997$connected == "connected"])^(alpha_gross), 
                 color = "connected", group = "connected"), 
             size = 0.5, alpha = 0.75) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "connected"], 
                  y = (1+firms_1997$capital_wedge[firms_1997$connected == "connected"])^(alpha_gross), 
                  color = "connected", group = "connected")) + 
  ylab("Effective capital wedge") + 
  xlab("(measured) log TFPQ") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_color_discrete(name = "Type:") + 
  theme(legend.position = "bottom") + 
  ylim(0.836,1.21)
  

### TFPQ - labor wedge correlation ### 
plot_TFPQ_labor_wedges <- ggplot() +
  geom_point(aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "non-connected"], 
                 y = (1+firms_1997$labor_wedge[firms_1997$connected == "non-connected"])^(beta_gross), 
                 color = "non-connected", group = "non-connected"), 
             size = 0.1, alpha = 0.2) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "non-connected"], 
                  y = (1+firms_1997$labor_wedge[firms_1997$connected == "non-connected"])^(beta_gross), 
                  color = "non-connected", group = "non-connected")) + 
  geom_point(aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "connected"], 
                 y = (1+firms_1997$labor_wedge[firms_1997$connected == "connected"])^(beta_gross), 
                 color = "connected", group = "connected"), 
             size = 0.5, alpha = 0.75) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "connected"], 
                  y = (1+firms_1997$labor_wedge[firms_1997$connected == "connected"])^(beta_gross), 
                  color = "connected", group = "connected")) + 
  ylab("Effective labor wedge") + 
  xlab("(measured) TFPQ") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_color_discrete(name = "Type:") + 
  theme(legend.position = "bottom") + 
  ylim(0.8719,1.1491)

# So "wedges" are increasing in TFPQ. 
# Conditional on TFPQ, looks pretty similar for connected firms 
# Higher wedges of connected firms mostly explained by higher (measured) TFPQ 
# Conditional on TFPQ, connected firms tend to face slightly LOWER taxes. 

## Create joint plot (for Appendix) 
ggarrange(
  plot_TFPQ_labor_wedges + labs(caption="(A) Correlation: labor wedges and TFPQ") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_TFPQ_capital_wedges + labs(caption="(B) Correlation: capital wedges and TFPQ") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  
ggsave(width = 12, height = 5, dpi = 200,
       "Paper writing/Revision JPE Macro/Final paper/Paper/Figures/Figure_appendix_labor_capital_wedges_TFPQ.png")


## Show summary plot (with summary of wedges) 
firms_1997_wedges <- firms_1997 %>% 
  mutate(
    joint_effective_wedge = ((1+capital_wedge)^alpha_gross)*((1+labor_wedge)^beta_gross)
  ) %>% 
  filter(!is.na(joint_effective_wedge)) # drop single observation with missing labor wedge 

plot_joint_wedges_TFPQ_cor <- ggplot() +
  geom_point(aes(x = firms_1997_wedges$log_TFPQ_wedges_resid[firms_1997_wedges$connected == "non-connected"], 
                 y = (firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "non-connected"]), 
                 color = "non-connected", group = "non-connected"), 
             size = 0.1, alpha = 0.2) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997_wedges$log_TFPQ_wedges_resid[firms_1997_wedges$connected == "non-connected"], 
                  y = (firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "non-connected"]), 
                  color = "non-connected", group = "non-connected")) + 
  geom_point(aes(x = firms_1997_wedges$log_TFPQ_wedges_resid[firms_1997_wedges$connected == "connected"], 
                 y = (firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "connected"]), 
                 color = "connected", group = "connected"), 
             size = 0.5, alpha = 0.75) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997_wedges$log_TFPQ_wedges_resid[firms_1997_wedges$connected == "connected"], 
                  y = (firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "connected"]), 
                  color = "connected", group = "connected")) + 
  ylab("Effective total wedge") + 
  xlab("log TFPQ") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_color_discrete(name = "Type:") + 
  theme(legend.position = "bottom") + 
  ylim(0.729,1.379)

#### Save joint plot for main paper ####

ggarrange(
  plot_joint_wedges_TFPQ_cor + labs(caption="(A) Correlation: wedges and TFPQ") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_baseline_TFPQ_wedges_QR_bound + labs(caption="(B) TFPQ quantile ratio (with bounds)") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  
# ggsave(width = 12, height = 5, dpi = 200,
#        "Paper writing/Revision JPE Macro/paper revision round 2/Figures/Figure_main_wedge_extension.png") 

#### Estimate wedge-related parameters that can be directly estimated ####

# Means: C >> NC (but driven by higher TFPQ)
mean_tau_C <- mean(log(firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "connected"]))
mean_tau_NC <- mean(log(firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "non-connected"]))

# Variances: C > NC (also driven by higher TFPQ?) 
var_tau_C <- var(log(firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "connected"]))
var_tau_NC <- var(log(firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "non-connected"]))

# Correlation of wedges and productivity for non-connected: 
lm_cor_wedges_TFPQ_NC <- lm(
  data = firms_1997_wedges[firms_1997_wedges$connected == "non-connected",],
  formula = log(joint_effective_wedge) ~ log_TFPQ_wedges_resid)
rho_NC <- lm_cor_wedges_TFPQ_NC$coefficients[["log_TFPQ_wedges_resid"]]/(sqrt(var_tau_NC)/sd(log(firms_1997_wedges$TFPQ_wedges_resid[firms_1997_wedges$connected == "non-connected"])))

# Correlation of wedges and productivity for connected (target this below!) 
lm_cor_wedges_TFPQ_C <- lm(
  data = firms_1997_wedges[firms_1997_wedges$connected == "connected",],
  formula = log(joint_effective_wedge) ~ log_TFPQ_wedges_resid)
beta_coef_TFPQ_wedges_C <- lm_cor_wedges_TFPQ_C$coefficients[["log_TFPQ_wedges_resid"]]

###### Estimate Connections Technology with wedges ######

#### Prepare objects & set grid length ####

## To speed up estimation: Reduce length for epsilon & wedges (can still keep z_star as long as before!)
grid_dense_length_wedges <- 100

# create z_tilde_function
z_tilde_wedges <- function(z_star,x_star){
  return( (z_star*x_star)^(1/(1-eta_tilde_baseline)) )
}

# targeted moment 
empirical_QR_wedges <- baseline_TFPQ_wedges_QR_noF$TFPQ_wedges_QR 

# Extract z_star_NC_wedges
z_star_NC_wedges <- firms_1997_wedges$TFPQ_wedges_resid[firms_1997_wedges$connected == "non-connected"]

# get parameters based on z_star 
mean(log(z_star_NC_wedges))
var(log(z_star_NC_wedges))

# get density distribution over z_star_NC_wedges
z_star_wedges_marginal_distrib <- approxfun(density(log(z_star_NC_wedges)))(log(z_star_NC_wedges))

# specify grid here!! Want to range over z_i for non-connected firms (don't need very many points!) 
z_star_wedges_grid <- seq(from = unname(quantile(z_star_NC_wedges,probs = 0.0)), 
                   to = unname(quantile(z_star_NC_wedges,probs = 1)), 
                   length.out = grid_length)

z_star_wedges_grid_dense <- exp(seq(from = min(log(z_star_NC_wedges)), 
                         to = max(log(z_star_NC_wedges)), 
                         length.out = 500)) # can still keep length of z long 

# get density distribution over z_star grids 
z_star_wedges_grid_marginal_distrib <- approxfun(density(z_star_NC_wedges))(z_star_wedges_grid)
z_star_wedges_grid_marginal_distrib <- z_star_wedges_grid_marginal_distrib/sum(z_star_wedges_grid_marginal_distrib)

z_star_wedges_grid_dense_marginal_distrib <- approxfun(density(z_star_NC_wedges))(z_star_wedges_grid_dense)
z_star_wedges_grid_dense_marginal_distrib <- z_star_wedges_grid_dense_marginal_distrib/sum(z_star_wedges_grid_dense_marginal_distrib)

# can export z_star_grid with distribution
write_csv(x = tibble(z_star = z_star_wedges_grid_dense, distrib = z_star_wedges_grid_dense_marginal_distrib), 
          file = "Computation/data/z_star_grid_wedges.csv")

# need this for later
lowest_TFPQ_connected <- min(firms_1997_wedges$TFPQ_wedges_resid[firms_1997_wedges$connected == "connected"])

### Create z_star vectors

# z_star grid dense long
z_star_wedges_grid_dense_long <- (crossing(
  z_star = z_star_wedges_grid_dense,
  epsilon = c(1:grid_dense_length_wedges), 
  wedge = c(1:grid_dense_length_wedges)))$z_star

# also make z star grid marginal distrib long
z_star_wedges_grid_dense_marginal_distrib_long <- approxfun(density(log(z_star_NC_wedges)))(log(z_star_wedges_grid_dense_long))

#### Function to get joint distribution of subsidies over (z*,eps,wedges) ####

# Function with 
compute_subsidy_DRS_eps_cost_wedges_model_joint <- function(parameters, n_sampling = 10000){
  
  print(parameters)
  
  ##### Step 1: Prepare joint epsilon, wedge & productivity grid #####
  
  ### get good range for epsilon (simulating min and max)
  
  # Maximum (already take into account that correlation will be negative!!) 
  set.seed(12345)
  min_z_star <- unname(quantile(x = z_star_NC_wedges, probs = c(0.025))) # take 2.5% percentile
  random_draw_eps_max <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(min_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_max <- unname(quantile(x = random_draw_eps_max, probs = c(0.975))) # take 97.5% percentile 
  
  # Minimum 
  set.seed(12345)
  max_z_star <- unname(quantile(x = z_star_NC_wedges, probs = c(0.975))) # take 97.5% percentile
  random_draw_eps_min <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(max_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_min <- max(parameters["cost_level"],unname(quantile(x = random_draw_eps_min, probs = c(0.025)))) # make sure that epsilon is not below c (otherwise, no rent-seeking)
  
  #
  if(eps_min > eps_max){
    print("Warning: eps min > eps max, will set eps max larger")
    eps_max <- 2*eps_min
  }
  
  ### get good range for wedge (simulating min and max)
  
  var_log_wedge_z <- (1 - parameters["corr_wedge_z"]^2)*var_tau_C
  sd_tau_C <- sqrt(var_tau_C)
  mean_log_z_star <- mean(log(z_star_NC_wedges))
  sd_log_z_star <- sd(log(z_star_NC_wedges))
  
  # Minimum (already take into account that correlation is positive!!) 
  set.seed(12345)
  random_draw_wedge_min <- rnorm(
    n = 50000, 
    mean = mean_tau_C + parameters["corr_wedge_z"]*(sd_tau_C/sd_log_z_star)*(log(min_z_star) - mean_log_z_star),
    sd = sqrt(var_log_wedge_z)) 
  wedge_min <- unname(quantile(x = random_draw_wedge_min, probs = c(0.025))) # take 2.5% percentile 
  
  # Maximum 
  set.seed(12345)
  random_draw_wedge_max <- rnorm(
    n = 50000, 
    mean = mean_tau_C + parameters["corr_wedge_z"]*(sd_tau_C/sd_log_z_star)*(log(max_z_star) - mean_log_z_star),
    sd = sqrt(var_log_wedge_z))
  wedge_max <- unname(quantile(x = random_draw_wedge_max, probs = c(0.975)))
  
  # 
  if(wedge_min > wedge_max){
    print("Warning: wedge min > wedge max, will set wedge max larger")
    wedge_max <- 2*wedge_min
  }
  
  ### Create joint grid 
  
  joint_grid <- crossing(
    z_star = z_star_wedges_grid_dense,
    epsilon = seq(
      from = eps_min,
      to = eps_max, 
      length.out = grid_dense_length_wedges),
    log_wedge = seq(
      from = wedge_min,
      to = wedge_max, 
      length.out = grid_dense_length_wedges))
  
  z_star_wedges_grid_dense_long <- joint_grid$z_star
  epsilon_grid_dense_long <- joint_grid$epsilon 
  wedge_grid_dense_long <- exp(joint_grid$log_wedge)
  
  print("Step 1 done.")
  
  ##### Step 2: Get probability distribution over states #####
  
  # Create df
  results_df <- tibble(
    z_star = z_star_wedges_grid_dense_long,
    epsilon = epsilon_grid_dense_long,
    wedge = wedge_grid_dense_long, 
    z_star_wedges_marginal_distrib = z_star_wedges_grid_dense_marginal_distrib_long, 
    epsilon_condit_distrib = dnorm(
      x = epsilon_grid_dense_long,
      mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star_wedges_grid_dense_long),
      sd = sqrt(parameters["variance_eps_z"])), 
    wedge_condit_distrib = dnorm(
      x = log(wedge_grid_dense_long),
      mean = mean_tau_C + parameters["corr_wedge_z"]*(sd_tau_C/sd_log_z_star)*(log(z_star_wedges_grid_dense_long) - mean_log_z_star),
      sd = sqrt(var_log_wedge_z)) 
  )
  
  # Get joint density for each state
  results_df <- results_df %>% 
    group_by(z_star) %>% 
    mutate(
      # first normalize within z_star
      epsilon_condit_distrib = epsilon_condit_distrib/sum(epsilon_condit_distrib), 
      wedge_condit_distrib = wedge_condit_distrib/sum(wedge_condit_distrib),
      joint_condit_distrib = (epsilon_condit_distrib*wedge_condit_distrib)/sum(epsilon_condit_distrib*wedge_condit_distrib)
    ) %>% ungroup() %>% 
    mutate(
      joint_density = joint_condit_distrib*z_star_wedges_marginal_distrib, 
      joint_density = joint_density/sum(joint_density)
      # # then get joint density: product of marginal & condit distrib  
      # joint_density_epsilon = epsilon_condit_distrib*z_star_wedges_marginal_distrib,
      # joint_density_epsilon = joint_density_epsilon/sum(joint_density_epsilon),
      # # then get joint density: product of marginal & condit distrib  
      # joint_density_wedge = wedge_condit_distrib*z_star_wedges_marginal_distrib,
      # joint_density_wedge = joint_density_wedge/sum(joint_density_wedge), 
      # # Now combine for full joint density (simply multiply given independence)
      # joint_density = joint_density_epsilon*joint_density_wedge,
      # joint_density = joint_density/sum(joint_density)
    )
  
  ## Next, draw from joint densities to reduce dimensionality 
  
  # draw representative sub-sample from state space 
  set.seed(12345) 
  sampled_entries <- sample(
    x = c(1:nrow(results_df)), 
    size = n_sampling, replace = T, 
    prob = results_df$joint_density)
  
  # restrict results to sampled entries (to reduce dimensionality)
  results_df <- results_df[sampled_entries,]
  
  # now compute x_star & z_tilde
  results_df$x_star <- ( (((1-vat_tax)*alpha_gross/R)^alpha_gross)*(((1-vat_tax)*beta_gross/wage)^beta_gross)*((gamma_gross/P)^gamma_gross) )*(1/results_df$wedge)
  results_df$z_tilde <- z_tilde_wedges(
    z_star = results_df$z_star,  
    x_star = results_df$x_star)
  
  # make sure to save wedges 
  wedges <- results_df$wedge
  
  print("Step 2 done.")
  
  ##### Step 3: Fill (restricted) results df with optimal choices  #####

  # Directly estimate results for restricted results_df sample
  results_df <- compute_subsidy_DRS_eps_cost_model_grid(
    z_star_grid = results_df$z_star,
    epsilon_grid = results_df$epsilon,
    z_tilde_grid = results_df$z_tilde,
    parameters = parameters)
  
  # make sure to add wedges back in 
  results_df$wedge <- wedges 

  print("Step 3 done.")

  ##### Step 4: Get results & return #####

  # restrict to firms that would choose to become connected
  connected_df <- results_df[results_df$choose_C,]

  # Build quantiles of TFPR distribution of connected firms in data
  connected_df <- connected_df %>% arrange(TFPQ_model)

  quantiles_connected_TFPQ_model <- unname(quantile(
    x = connected_df$TFPQ_model,
    probs = c(1:nrow_connected)/(nrow_connected+1)
  ))

  # Compute Quantile ratio in Model
  model_QR <- (compute_baseline_TFPQ_wedges_QR(
    data_connect = tibble(
      index = c(1:241),
      TFPQ_wedges_resid = quantiles_connected_TFPQ_model),
    data_nonconnect = results_df %>% rename(TFPQ_wedges_resid = z_star),
    cutoff = 0.0))$TFPQ_wedges_QR

  # evaluate objective over quantiles of TFPQ
  objective_QR_moment <- sum( (empirical_QR_wedges - model_QR)^2 )
  objective_QR_moment_norm <- objective_QR_moment/(sum( (mean(empirical_QR_wedges) - empirical_QR_wedges)^2 ))

  # Also compute implied probability of becoming connected
  proba_C <- sum(results_df$choose_C)/n_sampling
  
  # Compute coefficient for correlation of TFPQ and wedges 
  lm_cor_wedges_TFPQ_C_model <- lm(
    data = connected_df, 
    formula = log(wedge) ~ log(TFPQ_model)
  )
  beta_coef_TFPQ_wedges_C_model <- lm_cor_wedges_TFPQ_C_model$coefficients[["log(TFPQ_model)"]]

  # construct objective for beta coefficient 
  objective_beta_coef <- abs((beta_coef_TFPQ_wedges_C - beta_coef_TFPQ_wedges_C_model)/beta_coef_TFPQ_wedges_C)

  # return
  return(list(objective_QR = objective_QR_moment_norm,
              objective_beta = objective_beta_coef, 
              objective = objective_QR_moment_norm + objective_beta_coef,
              results_df = results_df,
              quantiles_model = quantiles_connected_TFPQ_model,
              proba_C = proba_C
  ))
}

#### Find optimal parameters #### 

# start with initial guess 
guess_param_wedges <- c(
  "corr_wedge_z" = rho_NC,
  "fixed_cost" = 0.0, 
  "theta" = 0.2, 
  "alpha_eps" = 0.041, 
  "beta_eps" = -0.011, 
  "variance_eps_z" = 4.656e-07, 
  "cost_level" = 1e-08, 
  "cost_elasticity" = 1.04)

# check that get results given guess 
test_DRS_wedges <- compute_subsidy_DRS_eps_cost_wedges_model_joint(parameters = guess_param_wedges, n_sampling = 10000)

## Objective(s) seem to work! 
test_DRS_wedges$objective
test_DRS_wedges$objective_beta
test_DRS_wedges$objective_QR

# optimal subsidies
summary(test_DRS_wedges$results_df$optimal_subsidy)

# test that marginal distribution of z* is indeed the same!! Yes! 
ggplot() + 
  geom_density(aes(x = log(test_DRS_wedges$results_df$z_star), color = "Model")) + 
  geom_density(aes(x = log(z_star_NC_wedges), color = "Data"))

# start with good guess (found from running many different coarser optimizations before)
initial_parameter_wedges <- c(
  "corr_wedge_z" = 0.795,
  "alpha_eps" = 0.0744, 
  "beta_eps" = -0.01287, 
  "variance_eps_z" = 4.7726e-07, 
  "cost_level" = 1.023e-08)

## Start optim parallel (fixes elasticities, they were already found based on grid search)
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_wedges <- optimParallel( 
  par = initial_parameter_wedges/initial_parameter_wedges, 
  fn = function(par){
    parameters <- par*initial_parameter_wedges
    
    # check parameter combinations 
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star_NC_wedges)) < 0.01){
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_cost_wedges_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0,
                       "theta" = 0.2,"cost_elasticity" = 1.04), 
        n_sampling = 10000)
      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.75,0.5,0.5,0.5,0.5), # 0.75,
  upper = c(1.175,2.0,2.0,2.0,2.0) # 1.01,
)

stopCluster(cl) 

optimal_wedges$par*initial_parameter_wedges
optimal_wedges$value # Overall objective = 0.197 (R2 = 0.803)
optimal_wedges$message # Converged

# solve for correlation rho: -0.999
ratio_beta_sigma_wedges <- ((optimal_wedges$par*initial_parameter_wedges)["beta_eps"])/sqrt((optimal_wedges$par*initial_parameter_wedges)["variance_eps_z"])
estimate_rho_wedges <- ratio_beta_sigma_wedges/sqrt((ratio_beta_sigma_wedges)^2 + 1)

# save parameters 
parameters_wedges <- c(
  optimal_wedges$par*initial_parameter_wedges,
  "fixed_cost" = 0.0, 
  "theta" = 0.2, 
  "cost_elasticity" = 1.04, 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = unname(estimate_rho_wedges)
)

# also save as dataframe (to read in Julia)
parameters_df_wedges <- tibble(
  "corr_wedge_z" = (optimal_wedges$par*initial_parameter_wedges)[[1]],
  "alpha_eps" = (optimal_wedges$par*initial_parameter_wedges)[[2]],
  "beta_eps" = (optimal_wedges$par*initial_parameter_wedges)[[3]],
  "variance_eps_z" = (optimal_wedges$par*initial_parameter_wedges)[[4]],
  "cost_level" = (optimal_wedges$par*initial_parameter_wedges)[[5]],
  "fixed_cost" = 0.0, 
  "theta" = 0.2, 
  "cost_elasticity" = 1.04, 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = unname(estimate_rho_wedges),
  "mean_tau_C" = mean_tau_C, 
  "mean_tau_NC" = mean_tau_NC, 
  "sd_tau_C" = sqrt(var_tau_C),
  "sd_tau_NC" = sqrt(var_tau_NC),
  "mean_log_z_star" = mean(log(z_star_NC_wedges)),
  "sd_log_z_star" = sd(log(z_star_NC_wedges)),
  "corr_wedge_z_NC" = rho_NC,
  "corr_wedge_z_C" = (optimal_wedges$par*initial_parameter_wedges)[[1]]
)
write_csv(x = parameters_df_wedges, file = "Computation/data/parameters_wedges.csv")

# now find these results 
results_optimal_wedges <- compute_subsidy_DRS_eps_cost_wedges_model_joint(
  parameters = c(optimal_wedges$par*initial_parameter_wedges,
                 "fixed_cost" = 0.0, "theta" = 0.2, "cost_elasticity" = 1.04), 
  n_sampling = 50000)

results_optimal_wedges$objective_beta # Only deviates by 6.7%
results_optimal_wedges$objective_QR # R2 on QR is at 84%


#### Save joint plot for main paper (Figure 6) ####

# compute TFPQ quantile ratio in model 
model_wedges_TFPQ_QR_noF <- compute_baseline_TFPQ_wedges_QR(
  data_connect = tibble(
    index = c(1:241),
    TFPQ_wedges_resid = results_optimal_wedges$quantiles_model), 
  data_nonconnect = results_optimal_wedges$results_df %>% 
    rename(TFPQ_wedges_resid = z_star), 
  cutoff = 0.0)

# Plot estimated TFPQ QR schedule  
plot_main_TFPQ_wedges_QR_fit <- ggplot() + 
  geom_line(data = CI_baseline_TFPQ_wedges_QR_bs_upper, aes(x = (productivity_order/241)*100, y = TFPQ_wedges_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_point(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_wedges_QR_noF$TFPQ_wedges_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = (c(1:241)/241)*100, y = model_wedges_TFPQ_QR_noF$TFPQ_wedges_QR, color = "Model"), linewidth = 1.0) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Percentiles") + 
  scale_color_manual(values = c("#e74c3c","#3498db"), name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")

## Create joint plot with results (incl estimation fit) 
ggarrange(
  plot_joint_wedges_TFPQ_cor + labs(caption="(A) Correlation: wedges and TFPQ") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_main_TFPQ_wedges_QR_fit + labs(caption="(B) TFPQ quantile ratio") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)
ggsave(width = 12, height = 5, dpi = 200,
       "Paper writing/Revision JPE Macro/Final paper/Publication Figures/Figure6_wedge_extension.pdf") 

###### Remaining estimation ######
#### Relative labor and capital wedges conditional on total wedges ####

# Test: Do relative labor and capital wedges correlate with productivity conditional on total wedge?

firms_1997_wedges <- firms_1997_wedges %>% 
  mutate(
    capital_wedge_incl = (1+capital_wedge)^alpha_gross, 
    labor_wedge_incl = (1+labor_wedge)^beta_gross,
    capital_wedge_share = log(capital_wedge_incl)/(log(capital_wedge_incl) + log(labor_wedge_incl)),
    labor_wedge_share = 1 - capital_wedge_share, 
    FE_total_wedge = ntile(x = log(joint_effective_wedge), n = 250)
  )

### Test: Look at shares and see if they systematically vary with total wedge & TFPQ 

# No, capital wedge share does not vary with total wedge (effect is very small; not significant if dont restrict)
summary(fixest::feols(
  data = firms_1997_wedges %>% filter(connected == "non-connected" & capital_wedge_share >= -5 & capital_wedge_share <= 5),
  fml = capital_wedge_share ~ log(joint_effective_wedge)))
# Why is effect small? One unit increase in log(joint_effective_wedge) gives 24pp higher capital share, but IQR of log(joint_effective_wedge) is only 0.22, so IQR gives 0.05pp (which is small)

# No correlation with TFPQ conditional on total wedge
summary(fixest::feols(
  data = firms_1997_wedges %>% filter(connected == "non-connected"  & capital_wedge_share >= 0 & capital_wedge_share <= 1),
  fml = capital_wedge_share ~ log_TFPQ_wedges_resid | FE_total_wedge))

## Conclusion: Warranted to just randomly draw relative labor/capital wedge given total wedge 

## Before exporting winsorize 
capital_wedge_share_NC <- firms_1997_wedges$capital_wedge_share[firms_1997_wedges$connected == "non-connected"]
capital_wedge_share_NC[capital_wedge_share_NC < quantile(x = capital_wedge_share_NC, probs = 0.05)] <- unname(quantile(x = capital_wedge_share_NC, probs = 0.05))
capital_wedge_share_NC[capital_wedge_share_NC > quantile(x = capital_wedge_share_NC, probs = 0.95)] <- unname(quantile(x = capital_wedge_share_NC, probs = 0.95))

capital_wedge_share_C <- firms_1997_wedges$capital_wedge_share[firms_1997_wedges$connected == "connected"]
capital_wedge_share_C[capital_wedge_share_C < quantile(x = capital_wedge_share_C, probs = 0.05)] <- unname(quantile(x = capital_wedge_share_C, probs = 0.05))
capital_wedge_share_C[capital_wedge_share_C > quantile(x = capital_wedge_share_C, probs = 0.95)] <- unname(quantile(x = capital_wedge_share_C, probs = 0.95))

# export relative capital share
write_csv(x = tibble(capital_wedge_share = capital_wedge_share_NC), file = "Computation/data/capital_wedge_share_NC.csv")
write_csv(x = tibble(capital_wedge_share = capital_wedge_share_C), file = "Computation/data/capital_wedge_share_C.csv")


#######################################################
###### Section 5.2 Extension: Production Network ###### 
#######################################################

####### Descriptives on Connections & TFPQ-QR by industry #######

# For this section, work with new dataset
firms_industry <- firms_1997

#### Descriptives on industry prevalence #### 

### Industry classification is ISIC Rev 2 (1968) 

# Get overview of connected firms by 2digit industry: 2 are below 10 firms 
firms_industry %>% group_by(industry_2digit) %>% 
  summarise(
    NC_nr = sum(connected == "non-connected"),
    C_nr = sum(connected == "connected"),
    C_share = sum(connected == "connected")/n())

# Get overview of connected firms by 3digit industry: 5 have zero, 22 below 10 firms 
firms_industry %>% group_by(industry_3digit) %>% 
  summarise(
    NC_nr = sum(connected == "non-connected"),
    C_nr = sum(connected == "connected"),
    C_share = sum(connected == "connected")/n()) %>% 
  print(n = 50)

# Summarise at broad 2-digit industry level (taken from ISIC Rev 2)
firms_industry <- firms_industry %>% mutate(
  industry_2digit_broad = case_when(
    industry_2digit == 31 ~ "Food, Beverages & Tobacco",
    industry_2digit == 32 ~ "Textiles", 
    industry_2digit %in% c(33,34) ~ "Wood & Paper",
    industry_2digit == 35 ~ "Chemicals", 
    industry_2digit == 36 ~ "Non-metallic minerals",
    industry_2digit %in% c(37,38) ~ "Metals & Machinery", 
    industry_2digit == 39 ~ "Other" # Will drop this when showing estimates 
  ))

## What is in "other"? Bunch of different things, probably best to drop it entirely! 
firms_industry %>% filter(industry_3digit == 390) %>% 
  group_by(industry_4digit) %>% 
  summarise(
    NC_nr = sum(connected == "non-connected"),
    C_nr = sum(connected == "connected"),
    C_share = sum(connected == "connected")/n()) %>% 
  print(n = 50)

## Which industries are particularly common for connected firms? Which are uncommon? 
firms_industry %>% 
  mutate(
    total_VA = sum(rvads), 
    total_gross = sum(routs)) %>% 
  group_by(industry_2digit_broad) %>% 
  summarise(
    VA_share = sum(rvads)/first(total_VA),
    domar_weight = sum(routs)/first(total_VA),
    output_share = sum(routs)/first(total_gross),
    total_firms = n(), 
    NC_nr = sum(connected == "non-connected"),
    C_nr = sum(connected == "connected"),
    C_share = sum(connected == "connected")/n()) %>% 
  arrange(C_share)
## Uncommon: Downstream industries such as Textiles, Other, Wood & Paper
## Common: Chemicals, Machinery 

#### TFPQ variation ####

### Get gross output elasticities 

# get gross elasticities at industry-level (directly take out VAT)
gross_elasticities_industry <- firms_industry %>% filter(connected == "non-connected") %>% 
  group_by(industry_2digit_broad) %>% 
  summarise(
    capital_elast = median(capital_share,na.rm = T)/(1-vat_tax),
    labor_elast = median(labor_share,na.rm = T)/(1-vat_tax),
    material_elast = median(intermediate_share,na.rm = T)) %>% 
  mutate(
    eta_tilde = capital_elast + labor_elast + material_elast, 
    sigma = 1/(1-eta_tilde)
         )
## Upstream industries seem to have much higher profit shares!
## Higher/lower elasticity of substitution in line with industry variation?
write_csv(x = gross_elasticities_industry, file = "Computation/data/gross_elasticities_industry.csv")

### Compute TFPQ 

# compute TFPQ using gross output first (assumes that P_j^m = 1!!)
firms_industry <- firms_industry %>% 
  left_join(x = ., y = gross_elasticities_industry) %>% 
  mutate(
    log_TFPQ_gross = log(routs) - capital_elast*log(capital_elast*(1-vat_tax)*routs/R) - labor_elast*log(labor_elast*(1-vat_tax)*routs) - material_elast*log(material_elast*routs), 
    TFPQ = exp(log_TFPQ_gross))

#### Residualize TFPQ ####

# initiate variables 
firms_industry$log_TFPQ_resid <- NA 
firms_industry$TFPQ_resid <- NA 

# loop through industries 
for(industry in unique(firms_industry$industry_2digit_broad)){
  
  # select right firms 
  selected_firms <- firms_industry$industry_2digit_broad == industry
  
  # residualize TFPQ 
  firms_industry$log_TFPQ_resid[selected_firms] <- firms_industry$log_TFPQ_gross[selected_firms] - 
    predict(
      object = fixest::feols(
        fml = log_TFPQ_gross ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit,
        data = firms_industry[selected_firms,]), 
      newdata = firms_industry[selected_firms,]) + mean(firms_industry$log_TFPQ_gross[selected_firms])
  firms_industry$TFPQ_resid[selected_firms] = exp(firms_industry$log_TFPQ_resid[selected_firms])
  rm(selected_firms)
}

#### Industry-level TFPQ-QR #### 

## Output: TFPQ-QR for each industry with number of connected firms 

# initiate df to save TFPQ QR 
industry_TFPQ_QR_data <- tibble(
  TFPQ_QR = rep(NA,nrow_connected), 
  TFPQ_percentile = rep(NA,nrow_connected), 
  industry = rep(NA,nrow_connected), 
  TFPQ_NC = rep(NA,nrow_connected))
prev_rows <- 0 

# loop through industries 
for(industry in unique(firms_industry$industry_2digit_broad)){
  
  # select right firms & entries 
  selected_firms <- firms_industry$industry_2digit_broad == industry
  selected_rows_C <- c((prev_rows + 1):(prev_rows + sum(firms_industry$connected[selected_firms] == "connected")))
  
  # assign industry 
  industry_TFPQ_QR_data$industry[selected_rows_C] <- industry 
  
  # solve TFPQ-QR 
  TFPQ_QR_results <- compute_baseline_TFPQ_QR(
    data_connect = firms_industry[selected_firms,] %>% filter(connected == "connected"), 
    data_nonconnect = firms_industry[selected_firms,] %>% filter(connected == "non-connected"), 
    cutoff = 0.0)
  
  # assign results 
  industry_TFPQ_QR_data$TFPQ_QR[selected_rows_C] <- TFPQ_QR_results$TFPQ_QR
  industry_TFPQ_QR_data$TFPQ_percentile[selected_rows_C] <- (c(1:length(selected_rows_C))/(length(selected_rows_C) + 1))*100
  industry_TFPQ_QR_data$TFPQ_NC[selected_rows_C] <- TFPQ_QR_results$productivity
  
  # update for next round 
  rm(selected_firms)
  prev_rows <- max(selected_rows_C)
  rm(selected_rows_C)
  rm(TFPQ_QR_results)
}

# clean up 
rm(industry)
rm(prev_rows)

# Plot estimated TFPQ_QR schedule  
plot_TFPQ_QR_industry <- ggplot(
  data = industry_TFPQ_QR_data %>% 
    # make sure to drop "Other" industry, given only 2 connected firms 
    filter(industry != "Other")) + 
  geom_line(aes(x = TFPQ_percentile, y = TFPQ_QR, group = industry, color = industry), linewidth = 1) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Percentile") + 
  scale_color_discrete(name = "Industry:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")

# maybe best way to show this? 
plot_TFPQ_QR_industry <- ggplot(
  data = industry_TFPQ_QR_data %>% 
    # make sure to drop "Other" industry, given only 2 connected firms 
    filter(industry != "Other") %>% 
    mutate(industry = case_when(
      industry == "Food, Beverages & Tobacco" ~ "Food",
      industry == "Wood & Paper" ~ "Wood", 
      industry == "Metals & Machinery" ~ "Machinery",
      industry == "Non-metallic minerals" ~ "Minerals",
      .default = industry))) + 
  geom_point(aes(x = log(TFPQ_NC), y = TFPQ_QR, group = industry, color = industry), shape = 4, size = 1) + 
  geom_line(aes(x = log(TFPQ_NC), y = TFPQ_QR, group = industry, color = industry), linewidth = 1, alpha = 0.5) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "(log) TFPQ | NC") + 
  scale_color_discrete(name = "Industry:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")
# ggsave(width = 6, height = 4, "Paper writing/Revision JPE Macro/paper revision round 2/Figures/industry_TFPQ_QR_data.png")


####### Estimation by sector #######

get_estimation_objects_industry <- function(industry){
  
  #### Preparation ####
  
  ## restrict to right data 
  industry_data <- firms_industry %>% filter(industry_2digit_broad == industry)
  
  # eta tilde & revenue elasticities  
  eta_tilde = first(industry_data$eta_tilde)
  alpha_tilde = first(industry_data$capital_elast)
  beta_tilde = first(industry_data$labor_elast)
  gamma_tilde = first(industry_data$material_elast)
  
  # elasticity ratio 
  elasticity_ratio = eta_tilde/(1-eta_tilde)
  
  # number of connected firms 
  n_connect = nrow(industry_data %>% filter(connected == "connected"))
  share_connect = n_connect/nrow(industry_data) 
  
  # get overall share (need to exclude "other" industries)
  share_industry = nrow(industry_data)/nrow(firms_industry %>% filter(industry_2digit_broad != "Other"))
  
  #### Create remaining objects ####
  
  # x_star (needed to move between z_star to z_tilde) (DONT KNOW P_j^m!!!!) 
  x_star <- ( (((1-vat_tax)*alpha_tilde/R)^alpha_tilde)*(((1-vat_tax)*beta_tilde/wage)^beta_tilde)*((gamma_tilde/P)^gamma_tilde) )
  
  # create z_tilde_function
  z_tilde <- function(z_star){
    return( (z_star*x_star)^(1/(1-eta_tilde)) )
  }
  
  # Extract z_star_NC
  z_star_NC <- industry_data$TFPQ_resid[industry_data$connected == "non-connected"]
  
  # get density distribution over z_star_NC
  z_star_marginal_distrib <- approxfun(density(log(z_star_NC)))(log(z_star_NC))
  
  # get quantiles of non-connected 
  quantiles_non_connected_TFPQ_data <- unname(quantile(
    x = industry_data$TFPQ_resid[industry_data$connected == "non-connected"],
    probs = c(1:n_connect)/(n_connect+1),
  ))
  
  # specify z_star_grid 
  z_star_grid <- seq(from = unname(quantile(z_star_NC,probs = 0.0)), 
                              to = unname(quantile(z_star_NC,probs = 1)), 
                              length.out = grid_length)
  
  z_star_grid_dense <- seq(from = unname(quantile(z_star_NC,probs = 0.0)), 
                                    to = unname(quantile(z_star_NC,probs = 1)), 
                                    length.out = grid_dense_length)
  
  z_tilde_grid <- z_tilde(z_star = z_star_grid)
  
  z_tilde_grid_dense <- z_tilde(z_star = z_star_grid_dense)
  
  # get density distribution over z_star grids 
  z_star_grid_marginal_distrib <- approxfun(density(z_star_NC))(z_star_grid)
  z_star_grid_marginal_distrib <- z_star_grid_marginal_distrib/sum(z_star_grid_marginal_distrib)
  
  z_star_grid_dense_marginal_distrib <- approxfun(density(z_star_NC))(z_star_grid_dense)
  z_star_grid_dense_marginal_distrib <- z_star_grid_dense_marginal_distrib/sum(z_star_grid_dense_marginal_distrib)
  
  ## Now create long objects
  
  ### Create z_star vectors 
  
  # z_star grid long 
  z_star_grid_long <- (crossing(
    z_star = z_star_grid,
    epsilon = c(1:grid_length)))$z_star 
  
  # z_star grid dense long 
  z_star_grid_dense_long <- (crossing(
    z_star = z_star_grid_dense,
    epsilon = c(1:grid_dense_length)))$z_star 
  
  # also make z star grid marginal distrib long
  z_star_grid_dense_marginal_distrib_long <- approxfun(density(z_star_NC))(z_star_grid_dense_long)
  
  # can create z_tilde outside the loop
  z_tilde_grid_long <- z_tilde(z_star = z_star_grid_long)
  z_tilde_grid_dense_long <- z_tilde(z_star = z_star_grid_dense_long)
  
  # can create revenue & profits NC outside the loop 
  revenue_NC_z_star_grid_long <- z_tilde_grid_long
  profits_NC_z_star_grid_long <- (1-profit_tax)*(1-eta_tilde)*z_tilde_grid_long
  
  revenue_NC_z_star_grid_dense_long <- z_tilde_grid_dense_long
  profits_NC_z_star_grid_dense_long <- (1-profit_tax)*(1-eta_tilde)*z_tilde_grid_dense_long
  
  #### Finally, get empirical QR ####
  
  # TFPQ_QR (without fixed cost)
  TFPQ_QR_noF <- compute_baseline_TFPQ_QR(
    data_connect = industry_data[industry_data$connected == "connected",], 
    data_nonconnect = industry_data[industry_data$connected == "non-connected",], 
    cutoff = 0.0)
  
  empirical_QR <- TFPQ_QR_noF$TFPQ_QR
  
  
  #### Return results #### 
  return(list(
    alpha_tilde = alpha_tilde,
    beta_tilde = beta_tilde,
    gamma_tilde = gamma_tilde,
    eta_tilde = eta_tilde, 
    elasticity_ratio = elasticity_ratio,
    n_connect = n_connect,
    share_connect = share_connect,
    share_industry = share_industry,
    z_star_NC = z_star_NC, 
    z_star_grid_dense = z_star_grid_dense, 
    z_star_grid_dense_long = z_star_grid_dense_long, 
    z_tilde_grid_dense_long = z_tilde_grid_dense_long, 
    z_star_grid_dense_marginal_distrib = z_star_grid_dense_marginal_distrib, 
    z_star_grid_dense_marginal_distrib_long = z_star_grid_dense_marginal_distrib_long, 
    revenue_NC_z_star_grid_dense_long = revenue_NC_z_star_grid_dense_long,
    empirical_QR = empirical_QR
  ))
}

#### Chemicals #### 

# Get estimation objects for Chemicals 
estimation_objects_chemicals <- get_estimation_objects_industry(industry = "Chemicals")

## Start estimation ## 

### Algorithm 1: Gradient-based method, but fix elasticities (grid-search showed that these give the best fit) 

# then solve for optimal parameters 
initial_parameter_chemicals <- c(
  "alpha_eps" = 0.041, 
  "beta_eps" = -0.011, 
  "variance_eps_z" = 4.656e-07, 
  "cost_level" = 1e-08, 
  "theta" = 0.2,
  "cost_elasticity" = 1.04)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_param_chemicals <- optimParallel( 
  par = initial_parameter_chemicals/initial_parameter_chemicals, 
  fn = function(par){
    parameters <- par*initial_parameter_chemicals
    
    # check parameter combinations 
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(estimation_objects_chemicals$z_star_NC)) < 0.01){
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_cost_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0), 
        n_sampling = 10000, 
        z_star_NC = estimation_objects_chemicals$z_star_NC,
        z_star_grid_dense = estimation_objects_chemicals$z_star_grid_dense,
        z_star_grid_dense_long = estimation_objects_chemicals$z_star_grid_dense_long,
        z_tilde_grid_dense_long = estimation_objects_chemicals$z_tilde_grid_dense_long,
        z_star_grid_dense_marginal_distrib_long = estimation_objects_chemicals$z_star_grid_dense_marginal_distrib_long,
        revenue_NC_z_star_grid_dense_long = estimation_objects_chemicals$revenue_NC_z_star_grid_dense_long,
        n_connect = estimation_objects_chemicals$n_connect, 
        empirical_QR = estimation_objects_chemicals$empirical_QR)
      
      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5,0.5,0.5,0.9,0.95), 
  upper = c(2.0,2.0,2.0,2.0,1.3,1.1) 
)

stopCluster(cl) 

optimal_param_chemicals$par*initial_parameter_chemicals
optimal_param_chemicals$value # R2 about 78.4% (0.216)
optimal_param_chemicals$message # Converged! 

## See if this gives a good fit! 

### Solve for remaining parameters ### 

# solve for correlation rho: -0.999
chemicals_ratio_beta_sigma <- ((optimal_param_chemicals$par*initial_parameter_chemicals)["beta_eps"])/sqrt((optimal_param_chemicals$par*initial_parameter_chemicals)["variance_eps_z"])
chemicals_estimate_rho <- chemicals_ratio_beta_sigma/sqrt((chemicals_ratio_beta_sigma)^2 + 1)

# now find these results 
results_optimal_param_chemicals <- compute_subsidy_DRS_eps_cost_model_joint(
  parameters = c(optimal_param_chemicals$par*initial_parameter_chemicals,
                 "fixed_cost" = 0.0), 
  n_sampling = 50000,
  z_star_NC = estimation_objects_chemicals$z_star_NC,
  z_star_grid_dense = estimation_objects_chemicals$z_star_grid_dense,
  z_star_grid_dense_long = estimation_objects_chemicals$z_star_grid_dense_long,
  z_tilde_grid_dense_long = estimation_objects_chemicals$z_tilde_grid_dense_long,
  z_star_grid_dense_marginal_distrib_long = estimation_objects_chemicals$z_star_grid_dense_marginal_distrib_long,
  revenue_NC_z_star_grid_dense_long = estimation_objects_chemicals$revenue_NC_z_star_grid_dense_long,
  n_connect = estimation_objects_chemicals$n_connect, 
  empirical_QR = estimation_objects_chemicals$empirical_QR
  )

# compute TFPQ quantile ratio in model 
model_chemicals_TFPQ_QR <- compute_baseline_TFPQ_QR(
  data_connect = tibble(
    index = c(1:estimation_objects_chemicals$n_connect),
    TFPQ_resid = results_optimal_param_chemicals$quantiles_model), 
  data_nonconnect = results_optimal_param_chemicals$results_df %>% 
    rename(TFPQ_resid = z_star), 
  cutoff = 0.0)

# Fit looks pretty good!   
plot_industry_TFPQ_QR_fit_chemicals <- ggplot() + 
  geom_point(aes(x = 100*c(1:estimation_objects_chemicals$n_connect)/(estimation_objects_chemicals$n_connect + 1), y = estimation_objects_chemicals$empirical_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = 100*c(1:estimation_objects_chemicals$n_connect)/(estimation_objects_chemicals$n_connect + 1), y = model_chemicals_TFPQ_QR$TFPQ_QR, color = "Model"), linewidth = 1.0) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ QR") + 
  xlab(label = "TFPQ Percentile") + 
  scale_color_discrete(name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom", 
        plot.title = element_text(hjust = 0.5)) + 
  ggtitle("Chemicals")
plot_industry_TFPQ_QR_fit_chemicals

# save parameters
industry_parameters_chemicals <- c(
  optimal_param_chemicals$par*initial_parameter_chemicals,
  "fixed_cost" = 0.0,
  "alpha_tilde" = estimation_objects_chemicals$alpha_tilde,
  "beta_tilde" = estimation_objects_chemicals$beta_tilde,
  "gamma_tilde" = estimation_objects_chemicals$gamma_tilde,
  "eta_tilde" = estimation_objects_chemicals$eta_tilde,
  "elasticity_ratio" = estimation_objects_chemicals$elasticity_ratio, 
  "proba_c" = estimation_objects_chemicals$share_connect, 
  "share_industry" = estimation_objects_chemicals$share_industry,
  "rho" = unname(chemicals_estimate_rho)
)

# convert to df 
industry_parameters_chemicals <- as_tibble(as.list(industry_parameters_chemicals))

# export df
write_csv(x = industry_parameters_chemicals, file = "Computation/data/industry_parameters_chemicals.csv")


#### Metals & Machinery #### 

# Get estimation objects for Metals & Machinery 
estimation_objects_machinery <- get_estimation_objects_industry(industry = "Metals & Machinery")

## Start estimation ## 

### Algorithm 1: Gradient-based method, but fix elasticities (grid-search showed that these give the best fit) 

# then solve for optimal parameters 
initial_parameter_machinery <- c(
  "alpha_eps" = 0.069, 
  "beta_eps" = -0.01347, 
  "variance_eps_z" = 5.796e-07, 
  "cost_level" = 1.007e-08, 
  "theta" = 0.208,
  "cost_elasticity" = 1.11)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_param_machinery <- optimParallel( 
  par = initial_parameter_machinery/initial_parameter_machinery, 
  fn = function(par){
    parameters <- par*initial_parameter_machinery
    
    # check parameter combinations 
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(estimation_objects_machinery$z_star_NC)) < 0.01){
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_cost_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0), 
        n_sampling = 10000, 
        z_star_NC = estimation_objects_machinery$z_star_NC,
        z_star_grid_dense = estimation_objects_machinery$z_star_grid_dense,
        z_star_grid_dense_long = estimation_objects_machinery$z_star_grid_dense_long,
        z_tilde_grid_dense_long = estimation_objects_machinery$z_tilde_grid_dense_long,
        z_star_grid_dense_marginal_distrib_long = estimation_objects_machinery$z_star_grid_dense_marginal_distrib_long,
        revenue_NC_z_star_grid_dense_long = estimation_objects_machinery$revenue_NC_z_star_grid_dense_long,
        n_connect = estimation_objects_machinery$n_connect, 
        empirical_QR = estimation_objects_machinery$empirical_QR)
      
      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.9,0.9,0.9,0.9,0.95,0.95), 
  upper = c(1.1,1.1,1.1,1.1,1.05,1.05) 
)

stopCluster(cl) 

optimal_param_machinery$par*initial_parameter_machinery
optimal_param_machinery$value # R2 about 72% (0.28)
optimal_param_machinery$message # converged!! 

## See if this gives a good fit! 

### Solve for remaining parameters ### 

# solve for correlation rho: -0.998
machinery_ratio_beta_sigma <- ((optimal_param_machinery$par*initial_parameter_machinery)["beta_eps"])/sqrt((optimal_param_machinery$par*initial_parameter_machinery)["variance_eps_z"])
machinery_estimate_rho <- machinery_ratio_beta_sigma/sqrt((machinery_ratio_beta_sigma)^2 + 1)

# now find these results 
results_optimal_param_machinery <- compute_subsidy_DRS_eps_cost_model_joint(
  parameters = c(optimal_param_machinery$par*initial_parameter_machinery,
                 "fixed_cost" = 0.0), 
  n_sampling = 50000,
  z_star_NC = estimation_objects_machinery$z_star_NC,
  z_star_grid_dense = estimation_objects_machinery$z_star_grid_dense,
  z_star_grid_dense_long = estimation_objects_machinery$z_star_grid_dense_long,
  z_tilde_grid_dense_long = estimation_objects_machinery$z_tilde_grid_dense_long,
  z_star_grid_dense_marginal_distrib_long = estimation_objects_machinery$z_star_grid_dense_marginal_distrib_long,
  revenue_NC_z_star_grid_dense_long = estimation_objects_machinery$revenue_NC_z_star_grid_dense_long,
  n_connect = estimation_objects_machinery$n_connect, 
  empirical_QR = estimation_objects_machinery$empirical_QR
)

# compute TFPQ quantile ratio in model 
model_machinery_TFPQ_QR <- compute_baseline_TFPQ_QR(
  data_connect = tibble(
    index = c(1:estimation_objects_machinery$n_connect),
    TFPQ_resid = results_optimal_param_machinery$quantiles_model), 
  data_nonconnect = results_optimal_param_machinery$results_df %>% 
    rename(TFPQ_resid = z_star), 
  cutoff = 0.0)

# Fit looks pretty good!   
plot_industry_TFPQ_QR_fit_machinery <- ggplot() + 
  geom_point(aes(x = 100*c(1:estimation_objects_machinery$n_connect)/(estimation_objects_machinery$n_connect + 1), y = estimation_objects_machinery$empirical_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = 100*c(1:estimation_objects_machinery$n_connect)/(estimation_objects_machinery$n_connect + 1), y = model_machinery_TFPQ_QR$TFPQ_QR, color = "Model"), linewidth = 1.0) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ QR") + 
  xlab(label = "TFPQ Percentile") + 
  scale_color_discrete(name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5)) + 
  ggtitle("Metals & Machinery")
plot_industry_TFPQ_QR_fit_machinery

# save parameters
industry_parameters_machinery <- c(
  optimal_param_machinery$par*initial_parameter_machinery,
  "fixed_cost" = 0.0,
  "alpha_tilde" = estimation_objects_machinery$alpha_tilde,
  "beta_tilde" = estimation_objects_machinery$beta_tilde,
  "gamma_tilde" = estimation_objects_machinery$gamma_tilde,
  "eta_tilde" = estimation_objects_machinery$eta_tilde,
  "elasticity_ratio" = estimation_objects_machinery$elasticity_ratio, 
  "proba_c" = estimation_objects_machinery$share_connect, 
  "share_industry" = estimation_objects_machinery$share_industry,
  "rho" = unname(machinery_estimate_rho)
)


# convert to df 
industry_parameters_machinery <- as_tibble(as.list(industry_parameters_machinery))

# export df
write_csv(x = industry_parameters_machinery, file = "Computation/data/industry_parameters_machinery.csv")


#### Food, Beverages & Tobacco ####

# Get estimation objects for Food, Beverages & Tobacco
estimation_objects_food <- get_estimation_objects_industry(industry = "Food, Beverages & Tobacco")

### Algorithm: Gradient-based method with good initial guess (grid-search showed that these give the best fit)

# then solve for optimal parameters
initial_parameter_food <- c(
  "alpha_eps" = 0.045867,
  "beta_eps" = -0.008855,
  "variance_eps_z" = 5.9505e-07,
  "cost_level" = 5.1475e-09,
  "theta" = 0.222,
  "cost_elasticity" = 1.00423)

## Start optim parallel
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_param_food <- optimParallel(
  par = initial_parameter_food/initial_parameter_food,
  fn = function(par){
    parameters <- par*initial_parameter_food

    # check parameter combinations
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(estimation_objects_food$z_star_NC)) < 0.02){
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_cost_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0),
        n_sampling = 10000,
        z_star_NC = estimation_objects_food$z_star_NC,
        z_star_grid_dense = estimation_objects_food$z_star_grid_dense,
        z_star_grid_dense_long = estimation_objects_food$z_star_grid_dense_long,
        z_tilde_grid_dense_long = estimation_objects_food$z_tilde_grid_dense_long,
        z_star_grid_dense_marginal_distrib_long = estimation_objects_food$z_star_grid_dense_marginal_distrib_long,
        revenue_NC_z_star_grid_dense_long = estimation_objects_food$revenue_NC_z_star_grid_dense_long,
        n_connect = estimation_objects_food$n_connect,
        empirical_QR = estimation_objects_food$empirical_QR)

      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5,0.5,0.5,0.95,0.999),
  upper = c(2.0,2.0,2.0,2.0,1.05,1.01)
)

stopCluster(cl)

optimal_param_food$par*initial_parameter_food
optimal_param_food$value # R2 about 71.1% (0.289)
optimal_param_food$message # Converged!

### Solve for remaining parameters ###

# solve for correlation rho: -0.997
food_ratio_beta_sigma <- ((optimal_param_food$par*initial_parameter_food)["beta_eps"])/sqrt((optimal_param_food$par*initial_parameter_food)["variance_eps_z"])
food_estimate_rho <- food_ratio_beta_sigma/sqrt((food_ratio_beta_sigma)^2 + 1)

# now find these results
results_optimal_param_food <- compute_subsidy_DRS_eps_cost_model_joint(
  parameters = c(optimal_param_food$par*initial_parameter_food,
                 "fixed_cost" = 0.0),
  n_sampling = 50000,
  z_star_NC = estimation_objects_food$z_star_NC,
  z_star_grid_dense = estimation_objects_food$z_star_grid_dense,
  z_star_grid_dense_long = estimation_objects_food$z_star_grid_dense_long,
  z_tilde_grid_dense_long = estimation_objects_food$z_tilde_grid_dense_long,
  z_star_grid_dense_marginal_distrib_long = estimation_objects_food$z_star_grid_dense_marginal_distrib_long,
  revenue_NC_z_star_grid_dense_long = estimation_objects_food$revenue_NC_z_star_grid_dense_long,
  n_connect = estimation_objects_food$n_connect,
  empirical_QR = estimation_objects_food$empirical_QR
)

# compute TFPQ quantile ratio in model
model_food_TFPQ_QR <- compute_baseline_TFPQ_QR(
  data_connect = tibble(
    index = c(1:estimation_objects_food$n_connect),
    TFPQ_resid = results_optimal_param_food$quantiles_model),
  data_nonconnect = results_optimal_param_food$results_df %>%
    rename(TFPQ_resid = z_star),
  cutoff = 0.0)

# Fit is very good! 
plot_industry_TFPQ_QR_fit_food <- ggplot() +
  geom_point(aes(x = 100*c(1:estimation_objects_food$n_connect)/(estimation_objects_food$n_connect + 1), y = estimation_objects_food$empirical_QR, color = "Data"), shape = 4) +
  geom_line(aes(x = 100*c(1:estimation_objects_food$n_connect)/(estimation_objects_food$n_connect + 1), y = model_food_TFPQ_QR$TFPQ_QR, color = "Model"), linewidth = 1.0) +
  geom_hline(yintercept = 0, linetype = "longdash") +
  ylab(label = "TFPQ QR") +
  xlab(label = "TFPQ Percentile") +
  scale_color_discrete(name = "Type:") +
  theme_classic(base_size = 16, base_family = "LM Roman 10") +
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5)) +
  ggtitle("Food, Beverages & Tobacco")
plot_industry_TFPQ_QR_fit_food

# save parameters
industry_parameters_food <- c(
  optimal_param_food$par*initial_parameter_food,
  "fixed_cost" = 0.0,
  "alpha_tilde" = estimation_objects_food$alpha_tilde,
  "beta_tilde" = estimation_objects_food$beta_tilde,
  "gamma_tilde" = estimation_objects_food$gamma_tilde,
  "eta_tilde" = estimation_objects_food$eta_tilde,
  "elasticity_ratio" = estimation_objects_food$elasticity_ratio,
  "proba_c" = estimation_objects_food$share_connect,
  "share_industry" = estimation_objects_food$share_industry,
  "rho" = unname(food_estimate_rho)
)

# convert to df
industry_parameters_food <- as_tibble(as.list(industry_parameters_food))

# export df
write_csv(x = industry_parameters_food, file = "Computation/data/industry_parameters_food.csv")

#### Non-metallic minerals ####

# Get estimation objects for Non-metallic minerals
estimation_objects_minerals <- get_estimation_objects_industry(industry = "Non-metallic minerals")

### Algorithm 1: Gradient-based method

# start with initial parameters
initial_parameter_minerals <- c(
  "alpha_eps" = 0.0198108,
  "beta_eps" = -0.009549,
  "variance_eps_z" = 7.865905e-07,
  "cost_level" = 7.216463e-09,
  "theta" = 0.2739878,
  "cost_elasticity" = 1.103302)

## Start optim parallel
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_param_minerals <- optimParallel(
  par = initial_parameter_minerals/initial_parameter_minerals,
  fn = function(par){
    parameters <- par*initial_parameter_minerals

    # check parameter combinations
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(estimation_objects_minerals$z_star_NC)) < 0.005){ # 0.01
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_cost_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0),
        n_sampling = 10000,
        z_star_NC = estimation_objects_minerals$z_star_NC,
        z_star_grid_dense = estimation_objects_minerals$z_star_grid_dense,
        z_star_grid_dense_long = estimation_objects_minerals$z_star_grid_dense_long,
        z_tilde_grid_dense_long = estimation_objects_minerals$z_tilde_grid_dense_long,
        z_star_grid_dense_marginal_distrib_long = estimation_objects_minerals$z_star_grid_dense_marginal_distrib_long,
        revenue_NC_z_star_grid_dense_long = estimation_objects_minerals$revenue_NC_z_star_grid_dense_long,
        n_connect = estimation_objects_minerals$n_connect,
        empirical_QR = estimation_objects_minerals$empirical_QR)

      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.95,0.95,0.95,0.95,0.95,0.95), #0.3
  upper = c(1.05,1.05,1.05,1.05,1.05,1.05)  #1.1
)

stopCluster(cl)

optimal_param_minerals$par*initial_parameter_minerals
optimal_param_minerals$value # R2 about 59% (0.4087)
optimal_param_minerals$message # Converged!

## See if this gives a good fit!

# solve for correlation rho: -0.996
minerals_ratio_beta_sigma <- ((optimal_param_minerals$par*initial_parameter_minerals)["beta_eps"])/sqrt((optimal_param_minerals$par*initial_parameter_minerals)["variance_eps_z"])
minerals_estimate_rho <- minerals_ratio_beta_sigma/sqrt((minerals_ratio_beta_sigma)^2 + 1)

# now find these results
results_optimal_param_minerals <- compute_subsidy_DRS_eps_cost_model_joint(
  parameters = c(optimal_param_minerals$par*initial_parameter_minerals, #
                 "fixed_cost" = 0.0),
  n_sampling = 50000,
  z_star_NC = estimation_objects_minerals$z_star_NC,
  z_star_grid_dense = estimation_objects_minerals$z_star_grid_dense,
  z_star_grid_dense_long = estimation_objects_minerals$z_star_grid_dense_long,
  z_tilde_grid_dense_long = estimation_objects_minerals$z_tilde_grid_dense_long,
  z_star_grid_dense_marginal_distrib_long = estimation_objects_minerals$z_star_grid_dense_marginal_distrib_long,
  revenue_NC_z_star_grid_dense_long = estimation_objects_minerals$revenue_NC_z_star_grid_dense_long,
  n_connect = estimation_objects_minerals$n_connect,
  empirical_QR = estimation_objects_minerals$empirical_QR
)

# compute TFPQ quantile ratio in model
model_minerals_TFPQ_QR <- compute_baseline_TFPQ_QR(
  data_connect = tibble(
    index = c(1:estimation_objects_minerals$n_connect),
    TFPQ_resid = results_optimal_param_minerals$quantiles_model),
  data_nonconnect = results_optimal_param_minerals$results_df %>%
    rename(TFPQ_resid = z_star),
  cutoff = 0.0)

# Fit could be a bit better!
plot_industry_TFPQ_QR_fit_minerals <- ggplot() +
  geom_point(aes(x = 100*c(1:estimation_objects_minerals$n_connect)/(estimation_objects_minerals$n_connect + 1), y = estimation_objects_minerals$empirical_QR, color = "Data"), shape = 4) +
  geom_line(aes(x = 100*c(1:estimation_objects_minerals$n_connect)/(estimation_objects_minerals$n_connect + 1), y = model_minerals_TFPQ_QR$TFPQ_QR, color = "Model"), linewidth = 1.0) +
  geom_hline(yintercept = 0, linetype = "longdash") +
  ylab(label = "TFPQ QR") +
  xlab(label = "TFPQ Percentile") +
  scale_color_discrete(name = "Type:") +
  theme_classic(base_size = 16, base_family = "LM Roman 10") +
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5)) +
  ggtitle("Non-metallic Minerals")
plot_industry_TFPQ_QR_fit_minerals

## Save results
industry_parameters_minerals <- c(
  optimal_param_minerals$par*initial_parameter_minerals,
  "fixed_cost" = 0.0,
  "alpha_tilde" = estimation_objects_minerals$alpha_tilde,
  "beta_tilde" = estimation_objects_minerals$beta_tilde,
  "gamma_tilde" = estimation_objects_minerals$gamma_tilde,
  "eta_tilde" = estimation_objects_minerals$eta_tilde,
  "elasticity_ratio" = estimation_objects_minerals$elasticity_ratio,
  "proba_c" = estimation_objects_minerals$share_connect,
  "share_industry" = estimation_objects_minerals$share_industry,
  "rho" = unname(minerals_estimate_rho)
)

# convert to df
industry_parameters_minerals <- as_tibble(as.list(industry_parameters_minerals))

# export df
write_csv(x = industry_parameters_minerals, file = "Computation/data/industry_parameters_minerals.csv")

#### Wood & Paper #### 

# Get estimation objects for Wood & Paper 
estimation_objects_wood <- get_estimation_objects_industry(industry = "Wood & Paper")

## Start estimation ## 

### Algorithm: Gradient-based method, but fix elasticities (grid-search showed that these give the best fit) 

# then solve for optimal parameters 
initial_parameter_wood <- c(
  "alpha_eps" = 0.10, 
  "beta_eps" = -0.0201, 
  "variance_eps_z" = 7.319e-07, 
  "cost_level" = 0.996e-07, 
  "theta" = 0.204,
  "cost_elasticity" = 1.005)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_param_wood <- optimParallel( 
  par = initial_parameter_wood/initial_parameter_wood, 
  fn = function(par){
    parameters <- par*initial_parameter_wood
    
    # check parameter combinations 
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(estimation_objects_wood$z_star_NC)) < 0.02){
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_cost_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0), 
        n_sampling = 10000, 
        z_star_NC = estimation_objects_wood$z_star_NC,
        z_star_grid_dense = estimation_objects_wood$z_star_grid_dense,
        z_star_grid_dense_long = estimation_objects_wood$z_star_grid_dense_long,
        z_tilde_grid_dense_long = estimation_objects_wood$z_tilde_grid_dense_long,
        z_star_grid_dense_marginal_distrib_long = estimation_objects_wood$z_star_grid_dense_marginal_distrib_long,
        revenue_NC_z_star_grid_dense_long = estimation_objects_wood$revenue_NC_z_star_grid_dense_long,
        n_connect = estimation_objects_wood$n_connect, 
        empirical_QR = estimation_objects_wood$empirical_QR)
      
      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5,0.5,0.5,0.8,0.8), 
  upper = c(1.5,1.5,1.5,1.5,1.2,1.2) 
)

stopCluster(cl) 

optimal_param_wood$par*initial_parameter_wood
optimal_param_wood$value # R2 about 40% (0.60)
optimal_param_wood$message # Converged! 

## See if this gives a good fit! 

### Solve for remaining parameters ### 

# solve for correlation rho: -0.999
wood_ratio_beta_sigma <- ((optimal_param_wood$par*initial_parameter_wood)["beta_eps"])/sqrt((optimal_param_wood$par*initial_parameter_wood)["variance_eps_z"])
wood_estimate_rho <- wood_ratio_beta_sigma/sqrt((wood_ratio_beta_sigma)^2 + 1)

# now find these results 
results_optimal_param_wood <- compute_subsidy_DRS_eps_cost_model_joint(
  parameters = c(optimal_param_wood$par*initial_parameter_wood,
                 "fixed_cost" = 0.0), 
  n_sampling = 50000,
  z_star_NC = estimation_objects_wood$z_star_NC,
  z_star_grid_dense = estimation_objects_wood$z_star_grid_dense,
  z_star_grid_dense_long = estimation_objects_wood$z_star_grid_dense_long,
  z_tilde_grid_dense_long = estimation_objects_wood$z_tilde_grid_dense_long,
  z_star_grid_dense_marginal_distrib_long = estimation_objects_wood$z_star_grid_dense_marginal_distrib_long,
  revenue_NC_z_star_grid_dense_long = estimation_objects_wood$revenue_NC_z_star_grid_dense_long,
  n_connect = estimation_objects_wood$n_connect, 
  empirical_QR = estimation_objects_wood$empirical_QR
)

# compute TFPQ quantile ratio in model 
model_wood_TFPQ_QR <- compute_baseline_TFPQ_QR(
  data_connect = tibble(
    index = c(1:estimation_objects_wood$n_connect),
    TFPQ_resid = results_optimal_param_wood$quantiles_model), 
  data_nonconnect = results_optimal_param_wood$results_df %>% 
    rename(TFPQ_resid = z_star), 
  cutoff = 0.0)

# Fit looks okay  
plot_industry_TFPQ_QR_fit_wood <- ggplot() + 
  geom_point(aes(x = 100*c(1:estimation_objects_wood$n_connect)/(estimation_objects_wood$n_connect + 1), y = estimation_objects_wood$empirical_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = 100*c(1:estimation_objects_wood$n_connect)/(estimation_objects_wood$n_connect + 1), y = model_wood_TFPQ_QR$TFPQ_QR, color = "Model"), linewidth = 1.0) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ QR") + 
  xlab(label = "TFPQ Percentile") + 
  scale_color_discrete(name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5)) + 
  ggtitle("Wood & Paper")
plot_industry_TFPQ_QR_fit_wood


## Save results 
industry_parameters_wood <- c(
  optimal_param_wood$par*initial_parameter_wood,
  "fixed_cost" = 0.0,
  "alpha_tilde" = estimation_objects_wood$alpha_tilde,
  "beta_tilde" = estimation_objects_wood$beta_tilde,
  "gamma_tilde" = estimation_objects_wood$gamma_tilde,
  "eta_tilde" = estimation_objects_wood$eta_tilde,
  "elasticity_ratio" = estimation_objects_wood$elasticity_ratio, 
  "proba_c" = estimation_objects_wood$share_connect, 
  "share_industry" = estimation_objects_wood$share_industry,
  "rho" = unname(wood_estimate_rho)
)

# convert to df 
industry_parameters_wood <- as_tibble(as.list(industry_parameters_wood))

# export df
write_csv(x = industry_parameters_wood, file = "Computation/data/industry_parameters_wood.csv")


#### Textiles #### 

# Get estimation objects for Textiles 
estimation_objects_textiles <- get_estimation_objects_industry(industry = "Textiles")

### Algorithm: Gradient-based method, but fix elasticities (grid-search showed that these give the best fit) 

# then solve for optimal parameters 
initial_parameter_textiles <- c(
  "alpha_eps" = 0.046, 
  "beta_eps" = -0.0158, 
  "variance_eps_z" = 1.98e-07, 
  "cost_level" = 1.19e-08, 
  "theta" = 0.294,
  "cost_elasticity" = 1.44)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_param_textiles <- optimParallel( 
  par = initial_parameter_textiles/initial_parameter_textiles, 
  fn = function(par){
    parameters <- par*initial_parameter_textiles
    
    # check parameter combinations 
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(estimation_objects_textiles$z_star_NC)) < 0.01){
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_cost_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0), 
        n_sampling = 10000, 
        z_star_NC = estimation_objects_textiles$z_star_NC,
        z_star_grid_dense = estimation_objects_textiles$z_star_grid_dense,
        z_star_grid_dense_long = estimation_objects_textiles$z_star_grid_dense_long,
        z_tilde_grid_dense_long = estimation_objects_textiles$z_tilde_grid_dense_long,
        z_star_grid_dense_marginal_distrib_long = estimation_objects_textiles$z_star_grid_dense_marginal_distrib_long,
        revenue_NC_z_star_grid_dense_long = estimation_objects_textiles$revenue_NC_z_star_grid_dense_long,
        n_connect = estimation_objects_textiles$n_connect, 
        empirical_QR = estimation_objects_textiles$empirical_QR)
      
      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5,0.5,0.5,0.95,0.8), 
  upper = c(2.0,2.0,2.0,2.0,1.05,1.2) 
)

stopCluster(cl) 

optimal_param_textiles$par*initial_parameter_textiles
optimal_param_textiles$value # R2 about 75% (0.25)
optimal_param_textiles$message # 

## See if this gives a good fit! 

### Solve for remaining parameters ### 

# solve for correlation rho: -0.996
textiles_ratio_beta_sigma <- ((optimal_param_textiles$par*initial_parameter_textiles)["beta_eps"])/sqrt((optimal_param_textiles$par*initial_parameter_textiles)["variance_eps_z"])
textiles_estimate_rho <- textiles_ratio_beta_sigma/sqrt((textiles_ratio_beta_sigma)^2 + 1)

# now find these results 
results_optimal_param_textiles <- compute_subsidy_DRS_eps_cost_model_joint(
  parameters = c(optimal_param_textiles$par*initial_parameter_textiles,
                 "fixed_cost" = 0.0), 
  n_sampling = 50000,
  z_star_NC = estimation_objects_textiles$z_star_NC,
  z_star_grid_dense = estimation_objects_textiles$z_star_grid_dense,
  z_star_grid_dense_long = estimation_objects_textiles$z_star_grid_dense_long,
  z_tilde_grid_dense_long = estimation_objects_textiles$z_tilde_grid_dense_long,
  z_star_grid_dense_marginal_distrib_long = estimation_objects_textiles$z_star_grid_dense_marginal_distrib_long,
  revenue_NC_z_star_grid_dense_long = estimation_objects_textiles$revenue_NC_z_star_grid_dense_long,
  n_connect = estimation_objects_textiles$n_connect, 
  empirical_QR = estimation_objects_textiles$empirical_QR
)

# compute TFPQ quantile ratio in model 
model_textiles_TFPQ_QR <- compute_baseline_TFPQ_QR(
  data_connect = tibble(
    index = c(1:estimation_objects_textiles$n_connect),
    TFPQ_resid = results_optimal_param_textiles$quantiles_model), 
  data_nonconnect = results_optimal_param_textiles$results_df %>% 
    rename(TFPQ_resid = z_star), 
  cutoff = 0.0)

# Fit looks pretty good
plot_industry_TFPQ_QR_fit_textiles <- ggplot() + 
  geom_point(aes(x = 100*c(1:estimation_objects_textiles$n_connect)/(estimation_objects_textiles$n_connect + 1), y = estimation_objects_textiles$empirical_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = 100*c(1:estimation_objects_textiles$n_connect)/(estimation_objects_textiles$n_connect + 1), y = model_textiles_TFPQ_QR$TFPQ_QR, color = "Model"), linewidth = 1.0) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ QR") + 
  xlab(label = "TFPQ Percentile") + 
  scale_color_discrete(name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5)) + 
  ggtitle("Textiles")
plot_industry_TFPQ_QR_fit_textiles

## Save results 
industry_parameters_textiles <- c(
  optimal_param_textiles$par*initial_parameter_textiles,
  "fixed_cost" = 0.0,
  "alpha_tilde" = estimation_objects_textiles$alpha_tilde,
  "beta_tilde" = estimation_objects_textiles$beta_tilde,
  "gamma_tilde" = estimation_objects_textiles$gamma_tilde,
  "eta_tilde" = estimation_objects_textiles$eta_tilde,
  "elasticity_ratio" = estimation_objects_textiles$elasticity_ratio, 
  "proba_c" = estimation_objects_textiles$share_connect,
  "share_industry" = estimation_objects_textiles$share_industry,
  "rho" = unname(textiles_estimate_rho)
)

# convert to df 
industry_parameters_textiles <- as_tibble(as.list(industry_parameters_textiles))

# export df
write_csv(x = industry_parameters_textiles, file = "Computation/data/industry_parameters_textiles.csv")


#### Save joint plot ####

## Create joint plot with results (incl estimation fit) 
ggarrange(
  plot_industry_TFPQ_QR_fit_textiles, 
  plot_industry_TFPQ_QR_fit_wood, 
  plot_industry_TFPQ_QR_fit_minerals, 
  plot_industry_TFPQ_QR_fit_food, 
  plot_industry_TFPQ_QR_fit_machinery,
  plot_industry_TFPQ_QR_fit_chemicals, 
  ncol = 3, nrow = 2, 
  common.legend = T, legend = "bottom")
ggsave(width = 12, height = 5, dpi = 200,
       "Paper writing/Revision JPE Macro/Final paper/Paper/Figures/Figure_industry_TFPQ_QR_fit.png") 
ggsave(width = 12, height = 5, dpi = 200,
       "Paper writing/Revision JPE Macro/Final paper/Publication Figures/Figure7_network_extension.pdf") 

#### Average R2 across industries #### 

# 66% average R2 
mean(c(
  1 - optimal_param_textiles$value, 
  1 - optimal_param_wood$value, 
  1 - optimal_param_minerals$value,
  1 - optimal_param_chemicals$value,
  1 - optimal_param_machinery$value, 
  1 - optimal_param_food$value)) 

#### Average (implied) subsidies across industries ####

summary(results_optimal_param_textiles$results_df$optimal_subsidy)
summary(results_optimal_param_wood$results_df$optimal_subsidy)
summary(results_optimal_param_minerals$results_df$optimal_subsidy)
summary(results_optimal_param_chemicals$results_df$optimal_subsidy)
summary(results_optimal_param_machinery$results_df$optimal_subsidy)
summary(results_optimal_param_food$results_df$optimal_subsidy)

#### Export results #### 

## z_star_grids
z_star_grid_industries <- tibble(
  chemicals = estimation_objects_chemicals$z_star_grid_dense, 
  machinery = estimation_objects_machinery$z_star_grid_dense, 
  food = estimation_objects_food$z_star_grid_dense, 
  minerals = estimation_objects_minerals$z_star_grid_dense, 
  wood = estimation_objects_wood$z_star_grid_dense, 
  textiles = estimation_objects_textiles$z_star_grid_dense
)
write_csv(x = z_star_grid_industries, file = "Computation/data/industry_z_star_grid.csv")


## z_star_distributions 
z_star_distributions_industries <- tibble(
  chemicals = estimation_objects_chemicals$z_star_grid_dense_marginal_distrib, 
  machinery = estimation_objects_machinery$z_star_grid_dense_marginal_distrib, 
  food = estimation_objects_food$z_star_grid_dense_marginal_distrib, 
  minerals = estimation_objects_minerals$z_star_grid_dense_marginal_distrib, 
  wood = estimation_objects_wood$z_star_grid_dense_marginal_distrib, 
  textiles = estimation_objects_textiles$z_star_grid_dense_marginal_distrib
)
write_csv(x = z_star_distributions_industries, file = "Computation/data/industry_z_star_distribution.csv")

## Also make sure to export nu & IO-elasticities 

# load IO table 
io_elasticities <- readxl::read_xlsx(path = "Empirical Analysis/data/IDN-WIOD-1997.xlsx", sheet = "relative-table")

# save final use shares 
nu_industries <- io_elasticities$`Total Final Use`[3:8]
nu_industries <- as.numeric(nu_industries[c(4,6,1,5,3,2)])
write_rds(x = nu_industries, file = "Computation/data/industry_final_use_shares.R")

# save intermediate input shares 
interm_shares_industries <- io_elasticities[3:8,4:9]
interm_shares_industries <- interm_shares_industries[c(4,6,1,5,3,2),c(4,6,1,5,3,2)]
interm_shares_industries <- apply(interm_shares_industries, 2, as.numeric)
write_csv(x = as_tibble(interm_shares_industries), file = "Computation/data/industry_io_elasticities.csv")




#########################################
###### Export data for replication ###### 
#########################################

# export main firms dataset 
write_csv(x = firms_1997 %>% 
            select(rvads,routs,connected,
                   capital_share_resid,labor_share_resid,intermediate_share_resid,
                   log_TFPQ_resid,log_TFPQ_wedges_resid,
                   RandD_share_resid), 
          file = "Empirical Analysis/output/firm_replication_data.csv") 

# export other data 
write_csv(x = tibble(log_z_star_NC_survivors), 
          file = "Empirical Analysis/output/log_z_star_NC_survivors.csv")
write_csv(x = tibble(log_z_star_next_NC_survivors), 
          file = "Empirical Analysis/output/log_z_star_next_NC_survivors.csv")

# export gross elasticities 
write_csv(x = gross_elasticities, 
          file = "Empirical Analysis/output/gross_elasticities.csv")
